[AMDGPU][AsmParser][NFC] Get rid of custom default operand handlers.
[llvm-project.git] / clang / lib / CodeGen / CGOpenMPRuntime.cpp
blob994189277aff62ed2a3e9d0436b23f31713a98a1
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
11 //===----------------------------------------------------------------------===//
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45 #include <optional>
47 using namespace clang;
48 using namespace CodeGen;
49 using namespace llvm::omp;
51 namespace {
52 /// Base class for handling code generation inside OpenMP regions.
53 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
54 public:
55 /// Kinds of OpenMP regions used in codegen.
56 enum CGOpenMPRegionKind {
57 /// Region with outlined function for standalone 'parallel'
58 /// directive.
59 ParallelOutlinedRegion,
60 /// Region with outlined function for standalone 'task' directive.
61 TaskOutlinedRegion,
62 /// Region for constructs that do not require function outlining,
63 /// like 'for', 'sections', 'atomic' etc. directives.
64 InlinedRegion,
65 /// Region with outlined function for standalone 'target' directive.
66 TargetRegion,
69 CGOpenMPRegionInfo(const CapturedStmt &CS,
70 const CGOpenMPRegionKind RegionKind,
71 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
72 bool HasCancel)
73 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
74 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
77 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
78 bool HasCancel)
79 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
80 Kind(Kind), HasCancel(HasCancel) {}
82 /// Get a variable or parameter for storing global thread id
83 /// inside OpenMP construct.
84 virtual const VarDecl *getThreadIDVariable() const = 0;
86 /// Emit the captured statement body.
87 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89 /// Get an LValue for the current ThreadID variable.
90 /// \return LValue for thread id variable. This LValue always has type int32*.
91 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99 bool hasCancel() const { return HasCancel; }
101 static bool classof(const CGCapturedStmtInfo *Info) {
102 return Info->getKind() == CR_OpenMP;
105 ~CGOpenMPRegionInfo() override = default;
107 protected:
108 CGOpenMPRegionKind RegionKind;
109 RegionCodeGenTy CodeGen;
110 OpenMPDirectiveKind Kind;
111 bool HasCancel;
114 /// API for captured statement code generation in OpenMP constructs.
115 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
116 public:
117 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
118 const RegionCodeGenTy &CodeGen,
119 OpenMPDirectiveKind Kind, bool HasCancel,
120 StringRef HelperName)
121 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
122 HasCancel),
123 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
124 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
127 /// Get a variable or parameter for storing global thread id
128 /// inside OpenMP construct.
129 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131 /// Get the name of the capture helper.
132 StringRef getHelperName() const override { return HelperName; }
134 static bool classof(const CGCapturedStmtInfo *Info) {
135 return CGOpenMPRegionInfo::classof(Info) &&
136 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
137 ParallelOutlinedRegion;
140 private:
141 /// A variable or parameter storing global thread id for OpenMP
142 /// constructs.
143 const VarDecl *ThreadIDVar;
144 StringRef HelperName;
147 /// API for captured statement code generation in OpenMP constructs.
148 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
149 public:
150 class UntiedTaskActionTy final : public PrePostActionTy {
151 bool Untied;
152 const VarDecl *PartIDVar;
153 const RegionCodeGenTy UntiedCodeGen;
154 llvm::SwitchInst *UntiedSwitch = nullptr;
156 public:
157 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
158 const RegionCodeGenTy &UntiedCodeGen)
159 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
160 void Enter(CodeGenFunction &CGF) override {
161 if (Untied) {
162 // Emit task switching point.
163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164 CGF.GetAddrOfLocalVar(PartIDVar),
165 PartIDVar->getType()->castAs<PointerType>());
166 llvm::Value *Res =
167 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
168 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
169 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
170 CGF.EmitBlock(DoneBB);
171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
174 CGF.Builder.GetInsertBlock());
175 emitUntiedSwitch(CGF);
178 void emitUntiedSwitch(CodeGenFunction &CGF) const {
179 if (Untied) {
180 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
181 CGF.GetAddrOfLocalVar(PartIDVar),
182 PartIDVar->getType()->castAs<PointerType>());
183 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
184 PartIdLVal);
185 UntiedCodeGen(CGF);
186 CodeGenFunction::JumpDest CurPoint =
187 CGF.getJumpDestInCurrentScope(".untied.next.");
188 CGF.EmitBranch(CGF.ReturnBlock.getBlock());
189 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
190 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
191 CGF.Builder.GetInsertBlock());
192 CGF.EmitBranchThroughCleanup(CurPoint);
193 CGF.EmitBlock(CurPoint.getBlock());
196 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
199 const VarDecl *ThreadIDVar,
200 const RegionCodeGenTy &CodeGen,
201 OpenMPDirectiveKind Kind, bool HasCancel,
202 const UntiedTaskActionTy &Action)
203 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
204 ThreadIDVar(ThreadIDVar), Action(Action) {
205 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
208 /// Get a variable or parameter for storing global thread id
209 /// inside OpenMP construct.
210 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212 /// Get an LValue for the current ThreadID variable.
213 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215 /// Get the name of the capture helper.
216 StringRef getHelperName() const override { return ".omp_outlined."; }
218 void emitUntiedSwitch(CodeGenFunction &CGF) override {
219 Action.emitUntiedSwitch(CGF);
222 static bool classof(const CGCapturedStmtInfo *Info) {
223 return CGOpenMPRegionInfo::classof(Info) &&
224 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
225 TaskOutlinedRegion;
228 private:
229 /// A variable or parameter storing global thread id for OpenMP
230 /// constructs.
231 const VarDecl *ThreadIDVar;
232 /// Action for emitting code for untied tasks.
233 const UntiedTaskActionTy &Action;
236 /// API for inlined captured statement code generation in OpenMP
237 /// constructs.
238 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
239 public:
240 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
241 const RegionCodeGenTy &CodeGen,
242 OpenMPDirectiveKind Kind, bool HasCancel)
243 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
244 OldCSI(OldCSI),
245 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247 // Retrieve the value of the context parameter.
248 llvm::Value *getContextValue() const override {
249 if (OuterRegionInfo)
250 return OuterRegionInfo->getContextValue();
251 llvm_unreachable("No context value for inlined OpenMP region");
254 void setContextValue(llvm::Value *V) override {
255 if (OuterRegionInfo) {
256 OuterRegionInfo->setContextValue(V);
257 return;
259 llvm_unreachable("No context value for inlined OpenMP region");
262 /// Lookup the captured field decl for a variable.
263 const FieldDecl *lookup(const VarDecl *VD) const override {
264 if (OuterRegionInfo)
265 return OuterRegionInfo->lookup(VD);
266 // If there is no outer outlined region,no need to lookup in a list of
267 // captured variables, we can use the original one.
268 return nullptr;
271 FieldDecl *getThisFieldDecl() const override {
272 if (OuterRegionInfo)
273 return OuterRegionInfo->getThisFieldDecl();
274 return nullptr;
277 /// Get a variable or parameter for storing global thread id
278 /// inside OpenMP construct.
279 const VarDecl *getThreadIDVariable() const override {
280 if (OuterRegionInfo)
281 return OuterRegionInfo->getThreadIDVariable();
282 return nullptr;
285 /// Get an LValue for the current ThreadID variable.
286 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
287 if (OuterRegionInfo)
288 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
289 llvm_unreachable("No LValue for inlined OpenMP construct");
292 /// Get the name of the capture helper.
293 StringRef getHelperName() const override {
294 if (auto *OuterRegionInfo = getOldCSI())
295 return OuterRegionInfo->getHelperName();
296 llvm_unreachable("No helper name for inlined OpenMP construct");
299 void emitUntiedSwitch(CodeGenFunction &CGF) override {
300 if (OuterRegionInfo)
301 OuterRegionInfo->emitUntiedSwitch(CGF);
304 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306 static bool classof(const CGCapturedStmtInfo *Info) {
307 return CGOpenMPRegionInfo::classof(Info) &&
308 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
311 ~CGOpenMPInlinedRegionInfo() override = default;
313 private:
314 /// CodeGen info about outer OpenMP region.
315 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
316 CGOpenMPRegionInfo *OuterRegionInfo;
319 /// API for captured statement code generation in OpenMP target
320 /// constructs. For this captures, implicit parameters are used instead of the
321 /// captured fields. The name of the target region has to be unique in a given
322 /// application so it is provided by the client, because only the client has
323 /// the information to generate that.
324 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
325 public:
326 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
327 const RegionCodeGenTy &CodeGen, StringRef HelperName)
328 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
329 /*HasCancel=*/false),
330 HelperName(HelperName) {}
332 /// This is unused for target regions because each starts executing
333 /// with a single thread.
334 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336 /// Get the name of the capture helper.
337 StringRef getHelperName() const override { return HelperName; }
339 static bool classof(const CGCapturedStmtInfo *Info) {
340 return CGOpenMPRegionInfo::classof(Info) &&
341 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
344 private:
345 StringRef HelperName;
348 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
349 llvm_unreachable("No codegen for expressions");
351 /// API for generation of expressions captured in a innermost OpenMP
352 /// region.
353 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
354 public:
355 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
356 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
357 OMPD_unknown,
358 /*HasCancel=*/false),
359 PrivScope(CGF) {
360 // Make sure the globals captured in the provided statement are local by
361 // using the privatization logic. We assume the same variable is not
362 // captured more than once.
363 for (const auto &C : CS.captures()) {
364 if (!C.capturesVariable() && !C.capturesVariableByCopy())
365 continue;
367 const VarDecl *VD = C.getCapturedVar();
368 if (VD->isLocalVarDeclOrParm())
369 continue;
371 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
372 /*RefersToEnclosingVariableOrCapture=*/false,
373 VD->getType().getNonReferenceType(), VK_LValue,
374 C.getLocation());
375 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
377 (void)PrivScope.Privatize();
380 /// Lookup the captured field decl for a variable.
381 const FieldDecl *lookup(const VarDecl *VD) const override {
382 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
383 return FD;
384 return nullptr;
387 /// Emit the captured statement body.
388 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
389 llvm_unreachable("No body for expressions");
392 /// Get a variable or parameter for storing global thread id
393 /// inside OpenMP construct.
394 const VarDecl *getThreadIDVariable() const override {
395 llvm_unreachable("No thread id for expressions");
398 /// Get the name of the capture helper.
399 StringRef getHelperName() const override {
400 llvm_unreachable("No helper name for expressions");
403 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405 private:
406 /// Private scope to capture global variables.
407 CodeGenFunction::OMPPrivateScope PrivScope;
410 /// RAII for emitting code of OpenMP constructs.
411 class InlinedOpenMPRegionRAII {
412 CodeGenFunction &CGF;
413 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
414 FieldDecl *LambdaThisCaptureField = nullptr;
415 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
416 bool NoInheritance = false;
418 public:
419 /// Constructs region for combined constructs.
420 /// \param CodeGen Code generation sequence for combined directives. Includes
421 /// a list of functions used for code generation of implicitly inlined
422 /// regions.
423 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
424 OpenMPDirectiveKind Kind, bool HasCancel,
425 bool NoInheritance = true)
426 : CGF(CGF), NoInheritance(NoInheritance) {
427 // Start emission for the construct.
428 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
429 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
430 if (NoInheritance) {
431 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
432 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
433 CGF.LambdaThisCaptureField = nullptr;
434 BlockInfo = CGF.BlockInfo;
435 CGF.BlockInfo = nullptr;
439 ~InlinedOpenMPRegionRAII() {
440 // Restore original CapturedStmtInfo only if we're done with code emission.
441 auto *OldCSI =
442 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
443 delete CGF.CapturedStmtInfo;
444 CGF.CapturedStmtInfo = OldCSI;
445 if (NoInheritance) {
446 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
447 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
448 CGF.BlockInfo = BlockInfo;
453 /// Values for bit flags used in the ident_t to describe the fields.
454 /// All enumeric elements are named and described in accordance with the code
455 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
456 enum OpenMPLocationFlags : unsigned {
457 /// Use trampoline for internal microtask.
458 OMP_IDENT_IMD = 0x01,
459 /// Use c-style ident structure.
460 OMP_IDENT_KMPC = 0x02,
461 /// Atomic reduction option for kmpc_reduce.
462 OMP_ATOMIC_REDUCE = 0x10,
463 /// Explicit 'barrier' directive.
464 OMP_IDENT_BARRIER_EXPL = 0x20,
465 /// Implicit barrier in code.
466 OMP_IDENT_BARRIER_IMPL = 0x40,
467 /// Implicit barrier in 'for' directive.
468 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
469 /// Implicit barrier in 'sections' directive.
470 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
471 /// Implicit barrier in 'single' directive.
472 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
473 /// Call of __kmp_for_static_init for static loop.
474 OMP_IDENT_WORK_LOOP = 0x200,
475 /// Call of __kmp_for_static_init for sections.
476 OMP_IDENT_WORK_SECTIONS = 0x400,
477 /// Call of __kmp_for_static_init for distribute.
478 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
479 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
482 namespace {
483 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
484 /// Values for bit flags for marking which requires clauses have been used.
485 enum OpenMPOffloadingRequiresDirFlags : int64_t {
486 /// flag undefined.
487 OMP_REQ_UNDEFINED = 0x000,
488 /// no requires clause present.
489 OMP_REQ_NONE = 0x001,
490 /// reverse_offload clause.
491 OMP_REQ_REVERSE_OFFLOAD = 0x002,
492 /// unified_address clause.
493 OMP_REQ_UNIFIED_ADDRESS = 0x004,
494 /// unified_shared_memory clause.
495 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
496 /// dynamic_allocators clause.
497 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
498 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
501 } // anonymous namespace
503 /// Describes ident structure that describes a source location.
504 /// All descriptions are taken from
505 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
506 /// Original structure:
507 /// typedef struct ident {
508 /// kmp_int32 reserved_1; /**< might be used in Fortran;
509 /// see above */
510 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
511 /// KMP_IDENT_KMPC identifies this union
512 /// member */
513 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
514 /// see above */
515 ///#if USE_ITT_BUILD
516 /// /* but currently used for storing
517 /// region-specific ITT */
518 /// /* contextual information. */
519 ///#endif /* USE_ITT_BUILD */
520 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
521 /// C++ */
522 /// char const *psource; /**< String describing the source location.
523 /// The string is composed of semi-colon separated
524 // fields which describe the source file,
525 /// the function and a pair of line numbers that
526 /// delimit the construct.
527 /// */
528 /// } ident_t;
529 enum IdentFieldIndex {
530 /// might be used in Fortran
531 IdentField_Reserved_1,
532 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
533 IdentField_Flags,
534 /// Not really used in Fortran any more
535 IdentField_Reserved_2,
536 /// Source[4] in Fortran, do not use for C++
537 IdentField_Reserved_3,
538 /// String describing the source location. The string is composed of
539 /// semi-colon separated fields which describe the source file, the function
540 /// and a pair of line numbers that delimit the construct.
541 IdentField_PSource
544 /// Schedule types for 'omp for' loops (these enumerators are taken from
545 /// the enum sched_type in kmp.h).
546 enum OpenMPSchedType {
547 /// Lower bound for default (unordered) versions.
548 OMP_sch_lower = 32,
549 OMP_sch_static_chunked = 33,
550 OMP_sch_static = 34,
551 OMP_sch_dynamic_chunked = 35,
552 OMP_sch_guided_chunked = 36,
553 OMP_sch_runtime = 37,
554 OMP_sch_auto = 38,
555 /// static with chunk adjustment (e.g., simd)
556 OMP_sch_static_balanced_chunked = 45,
557 /// Lower bound for 'ordered' versions.
558 OMP_ord_lower = 64,
559 OMP_ord_static_chunked = 65,
560 OMP_ord_static = 66,
561 OMP_ord_dynamic_chunked = 67,
562 OMP_ord_guided_chunked = 68,
563 OMP_ord_runtime = 69,
564 OMP_ord_auto = 70,
565 OMP_sch_default = OMP_sch_static,
566 /// dist_schedule types
567 OMP_dist_sch_static_chunked = 91,
568 OMP_dist_sch_static = 92,
569 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
570 /// Set if the monotonic schedule modifier was present.
571 OMP_sch_modifier_monotonic = (1 << 29),
572 /// Set if the nonmonotonic schedule modifier was present.
573 OMP_sch_modifier_nonmonotonic = (1 << 30),
576 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
577 /// region.
578 class CleanupTy final : public EHScopeStack::Cleanup {
579 PrePostActionTy *Action;
581 public:
582 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
583 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
584 if (!CGF.HaveInsertPoint())
585 return;
586 Action->Exit(CGF);
590 } // anonymous namespace
592 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
593 CodeGenFunction::RunCleanupsScope Scope(CGF);
594 if (PrePostAction) {
595 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
596 Callback(CodeGen, CGF, *PrePostAction);
597 } else {
598 PrePostActionTy Action;
599 Callback(CodeGen, CGF, Action);
603 /// Check if the combiner is a call to UDR combiner and if it is so return the
604 /// UDR decl used for reduction.
605 static const OMPDeclareReductionDecl *
606 getReductionInit(const Expr *ReductionOp) {
607 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
608 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
609 if (const auto *DRE =
610 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
611 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
612 return DRD;
613 return nullptr;
616 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
617 const OMPDeclareReductionDecl *DRD,
618 const Expr *InitOp,
619 Address Private, Address Original,
620 QualType Ty) {
621 if (DRD->getInitializer()) {
622 std::pair<llvm::Function *, llvm::Function *> Reduction =
623 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
624 const auto *CE = cast<CallExpr>(InitOp);
625 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
626 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
627 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
628 const auto *LHSDRE =
629 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
630 const auto *RHSDRE =
631 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
632 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
633 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
634 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
635 (void)PrivateScope.Privatize();
636 RValue Func = RValue::get(Reduction.second);
637 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
638 CGF.EmitIgnoredExpr(InitOp);
639 } else {
640 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
641 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
642 auto *GV = new llvm::GlobalVariable(
643 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
644 llvm::GlobalValue::PrivateLinkage, Init, Name);
645 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
646 RValue InitRVal;
647 switch (CGF.getEvaluationKind(Ty)) {
648 case TEK_Scalar:
649 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
650 break;
651 case TEK_Complex:
652 InitRVal =
653 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
654 break;
655 case TEK_Aggregate: {
656 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
657 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
658 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659 /*IsInitializer=*/false);
660 return;
663 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
664 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
665 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
666 /*IsInitializer=*/false);
670 /// Emit initialization of arrays of complex types.
671 /// \param DestAddr Address of the array.
672 /// \param Type Type of array.
673 /// \param Init Initial expression of array.
674 /// \param SrcAddr Address of the original array.
675 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
676 QualType Type, bool EmitDeclareReductionInit,
677 const Expr *Init,
678 const OMPDeclareReductionDecl *DRD,
679 Address SrcAddr = Address::invalid()) {
680 // Perform element-by-element initialization.
681 QualType ElementTy;
683 // Drill down to the base element type on both arrays.
684 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
685 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
686 if (DRD)
687 SrcAddr =
688 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
690 llvm::Value *SrcBegin = nullptr;
691 if (DRD)
692 SrcBegin = SrcAddr.getPointer();
693 llvm::Value *DestBegin = DestAddr.getPointer();
694 // Cast from pointer to array type to pointer to single element.
695 llvm::Value *DestEnd =
696 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
697 // The basic structure here is a while-do loop.
698 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
699 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
700 llvm::Value *IsEmpty =
701 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
702 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
704 // Enter the loop body, making that address the current address.
705 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
706 CGF.EmitBlock(BodyBB);
708 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
710 llvm::PHINode *SrcElementPHI = nullptr;
711 Address SrcElementCurrent = Address::invalid();
712 if (DRD) {
713 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
714 "omp.arraycpy.srcElementPast");
715 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
716 SrcElementCurrent =
717 Address(SrcElementPHI, SrcAddr.getElementType(),
718 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
721 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
722 DestElementPHI->addIncoming(DestBegin, EntryBB);
723 Address DestElementCurrent =
724 Address(DestElementPHI, DestAddr.getElementType(),
725 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
727 // Emit copy.
729 CodeGenFunction::RunCleanupsScope InitScope(CGF);
730 if (EmitDeclareReductionInit) {
731 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
732 SrcElementCurrent, ElementTy);
733 } else
734 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
735 /*IsInitializer=*/false);
738 if (DRD) {
739 // Shift the address forward by one element.
740 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
741 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
742 "omp.arraycpy.dest.element");
743 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
746 // Shift the address forward by one element.
747 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
748 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
749 "omp.arraycpy.dest.element");
750 // Check whether we've reached the end.
751 llvm::Value *Done =
752 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
753 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
754 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
756 // Done.
757 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
760 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
761 return CGF.EmitOMPSharedLValue(E);
764 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
765 const Expr *E) {
766 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
767 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
768 return LValue();
771 void ReductionCodeGen::emitAggregateInitialization(
772 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
773 const OMPDeclareReductionDecl *DRD) {
774 // Emit VarDecl with copy init for arrays.
775 // Get the address of the original variable captured in current
776 // captured region.
777 const auto *PrivateVD =
778 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
779 bool EmitDeclareReductionInit =
780 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
781 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
782 EmitDeclareReductionInit,
783 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
784 : PrivateVD->getInit(),
785 DRD, SharedAddr);
788 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
789 ArrayRef<const Expr *> Origs,
790 ArrayRef<const Expr *> Privates,
791 ArrayRef<const Expr *> ReductionOps) {
792 ClausesData.reserve(Shareds.size());
793 SharedAddresses.reserve(Shareds.size());
794 Sizes.reserve(Shareds.size());
795 BaseDecls.reserve(Shareds.size());
796 const auto *IOrig = Origs.begin();
797 const auto *IPriv = Privates.begin();
798 const auto *IRed = ReductionOps.begin();
799 for (const Expr *Ref : Shareds) {
800 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
801 std::advance(IOrig, 1);
802 std::advance(IPriv, 1);
803 std::advance(IRed, 1);
807 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
808 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
809 "Number of generated lvalues must be exactly N.");
810 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
811 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
812 SharedAddresses.emplace_back(First, Second);
813 if (ClausesData[N].Shared == ClausesData[N].Ref) {
814 OrigAddresses.emplace_back(First, Second);
815 } else {
816 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
817 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
818 OrigAddresses.emplace_back(First, Second);
822 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
823 QualType PrivateType = getPrivateType(N);
824 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
825 if (!PrivateType->isVariablyModifiedType()) {
826 Sizes.emplace_back(
827 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
828 nullptr);
829 return;
831 llvm::Value *Size;
832 llvm::Value *SizeInChars;
833 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
834 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
835 if (AsArraySection) {
836 Size = CGF.Builder.CreatePtrDiff(ElemType,
837 OrigAddresses[N].second.getPointer(CGF),
838 OrigAddresses[N].first.getPointer(CGF));
839 Size = CGF.Builder.CreateNUWAdd(
840 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
841 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
842 } else {
843 SizeInChars =
844 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
845 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
847 Sizes.emplace_back(SizeInChars, Size);
848 CodeGenFunction::OpaqueValueMapping OpaqueMap(
849 CGF,
850 cast<OpaqueValueExpr>(
851 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
852 RValue::get(Size));
853 CGF.EmitVariablyModifiedType(PrivateType);
856 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
857 llvm::Value *Size) {
858 QualType PrivateType = getPrivateType(N);
859 if (!PrivateType->isVariablyModifiedType()) {
860 assert(!Size && !Sizes[N].second &&
861 "Size should be nullptr for non-variably modified reduction "
862 "items.");
863 return;
865 CodeGenFunction::OpaqueValueMapping OpaqueMap(
866 CGF,
867 cast<OpaqueValueExpr>(
868 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
869 RValue::get(Size));
870 CGF.EmitVariablyModifiedType(PrivateType);
873 void ReductionCodeGen::emitInitialization(
874 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
875 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
876 assert(SharedAddresses.size() > N && "No variable was generated");
877 const auto *PrivateVD =
878 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
879 const OMPDeclareReductionDecl *DRD =
880 getReductionInit(ClausesData[N].ReductionOp);
881 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
882 if (DRD && DRD->getInitializer())
883 (void)DefaultInit(CGF);
884 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
885 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
886 (void)DefaultInit(CGF);
887 QualType SharedType = SharedAddresses[N].first.getType();
888 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
889 PrivateAddr, SharedAddr, SharedType);
890 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
891 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
892 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
893 PrivateVD->getType().getQualifiers(),
894 /*IsInitializer=*/false);
898 bool ReductionCodeGen::needCleanups(unsigned N) {
899 QualType PrivateType = getPrivateType(N);
900 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
901 return DTorKind != QualType::DK_none;
904 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
905 Address PrivateAddr) {
906 QualType PrivateType = getPrivateType(N);
907 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
908 if (needCleanups(N)) {
909 PrivateAddr = CGF.Builder.CreateElementBitCast(
910 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
911 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
915 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
916 LValue BaseLV) {
917 BaseTy = BaseTy.getNonReferenceType();
918 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
919 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
920 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
921 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
922 } else {
923 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
924 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
926 BaseTy = BaseTy->getPointeeType();
928 return CGF.MakeAddrLValue(
929 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
930 CGF.ConvertTypeForMem(ElTy)),
931 BaseLV.getType(), BaseLV.getBaseInfo(),
932 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
935 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
936 Address OriginalBaseAddress, llvm::Value *Addr) {
937 Address Tmp = Address::invalid();
938 Address TopTmp = Address::invalid();
939 Address MostTopTmp = Address::invalid();
940 BaseTy = BaseTy.getNonReferenceType();
941 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
942 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
943 Tmp = CGF.CreateMemTemp(BaseTy);
944 if (TopTmp.isValid())
945 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
946 else
947 MostTopTmp = Tmp;
948 TopTmp = Tmp;
949 BaseTy = BaseTy->getPointeeType();
952 if (Tmp.isValid()) {
953 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
954 Addr, Tmp.getElementType());
955 CGF.Builder.CreateStore(Addr, Tmp);
956 return MostTopTmp;
959 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
960 Addr, OriginalBaseAddress.getType());
961 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
964 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
965 const VarDecl *OrigVD = nullptr;
966 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
967 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
968 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
969 Base = TempOASE->getBase()->IgnoreParenImpCasts();
970 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
971 Base = TempASE->getBase()->IgnoreParenImpCasts();
972 DE = cast<DeclRefExpr>(Base);
973 OrigVD = cast<VarDecl>(DE->getDecl());
974 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
975 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
976 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
977 Base = TempASE->getBase()->IgnoreParenImpCasts();
978 DE = cast<DeclRefExpr>(Base);
979 OrigVD = cast<VarDecl>(DE->getDecl());
981 return OrigVD;
984 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
985 Address PrivateAddr) {
986 const DeclRefExpr *DE;
987 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
988 BaseDecls.emplace_back(OrigVD);
989 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
990 LValue BaseLValue =
991 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
992 OriginalBaseLValue);
993 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
994 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
995 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
996 SharedAddr.getPointer());
997 llvm::Value *PrivatePointer =
998 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
999 PrivateAddr.getPointer(), SharedAddr.getType());
1000 llvm::Value *Ptr = CGF.Builder.CreateGEP(
1001 SharedAddr.getElementType(), PrivatePointer, Adjustment);
1002 return castToBase(CGF, OrigVD->getType(),
1003 SharedAddresses[N].first.getType(),
1004 OriginalBaseLValue.getAddress(CGF), Ptr);
1006 BaseDecls.emplace_back(
1007 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1008 return PrivateAddr;
1011 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1012 const OMPDeclareReductionDecl *DRD =
1013 getReductionInit(ClausesData[N].ReductionOp);
1014 return DRD && DRD->getInitializer();
1017 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1018 return CGF.EmitLoadOfPointerLValue(
1019 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1020 getThreadIDVariable()->getType()->castAs<PointerType>());
1023 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1024 if (!CGF.HaveInsertPoint())
1025 return;
1026 // 1.2.2 OpenMP Language Terminology
1027 // Structured block - An executable statement with a single entry at the
1028 // top and a single exit at the bottom.
1029 // The point of exit cannot be a branch out of the structured block.
1030 // longjmp() and throw() must not violate the entry/exit criteria.
1031 CGF.EHStack.pushTerminate();
1032 if (S)
1033 CGF.incrementProfileCounter(S);
1034 CodeGen(CGF);
1035 CGF.EHStack.popTerminate();
1038 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1039 CodeGenFunction &CGF) {
1040 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1041 getThreadIDVariable()->getType(),
1042 AlignmentSource::Decl);
1045 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1046 QualType FieldTy) {
1047 auto *Field = FieldDecl::Create(
1048 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1049 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1050 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1051 Field->setAccess(AS_public);
1052 DC->addDecl(Field);
1053 return Field;
1056 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1057 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1058 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1059 llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, false,
1060 hasRequiresUnifiedSharedMemory(),
1061 CGM.getLangOpts().OpenMPOffloadMandatory);
1062 OMPBuilder.initialize(CGM.getLangOpts().OpenMPIsDevice
1063 ? CGM.getLangOpts().OMPHostIRFile
1064 : StringRef{});
1065 OMPBuilder.setConfig(Config);
1068 void CGOpenMPRuntime::clear() {
1069 InternalVars.clear();
1070 // Clean non-target variable declarations possibly used only in debug info.
1071 for (const auto &Data : EmittedNonTargetVariables) {
1072 if (!Data.getValue().pointsToAliveValue())
1073 continue;
1074 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1075 if (!GV)
1076 continue;
1077 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1078 continue;
1079 GV->eraseFromParent();
1083 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1084 return OMPBuilder.createPlatformSpecificName(Parts);
1087 static llvm::Function *
1088 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1089 const Expr *CombinerInitializer, const VarDecl *In,
1090 const VarDecl *Out, bool IsCombiner) {
1091 // void .omp_combiner.(Ty *in, Ty *out);
1092 ASTContext &C = CGM.getContext();
1093 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1094 FunctionArgList Args;
1095 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1096 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1097 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1098 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1099 Args.push_back(&OmpOutParm);
1100 Args.push_back(&OmpInParm);
1101 const CGFunctionInfo &FnInfo =
1102 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1103 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1104 std::string Name = CGM.getOpenMPRuntime().getName(
1105 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1106 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1107 Name, &CGM.getModule());
1108 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1109 if (CGM.getLangOpts().Optimize) {
1110 Fn->removeFnAttr(llvm::Attribute::NoInline);
1111 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1112 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1114 CodeGenFunction CGF(CGM);
1115 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1116 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1117 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1118 Out->getLocation());
1119 CodeGenFunction::OMPPrivateScope Scope(CGF);
1120 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1121 Scope.addPrivate(
1122 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1123 .getAddress(CGF));
1124 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1125 Scope.addPrivate(
1126 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1127 .getAddress(CGF));
1128 (void)Scope.Privatize();
1129 if (!IsCombiner && Out->hasInit() &&
1130 !CGF.isTrivialInitializer(Out->getInit())) {
1131 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1132 Out->getType().getQualifiers(),
1133 /*IsInitializer=*/true);
1135 if (CombinerInitializer)
1136 CGF.EmitIgnoredExpr(CombinerInitializer);
1137 Scope.ForceCleanup();
1138 CGF.FinishFunction();
1139 return Fn;
1142 void CGOpenMPRuntime::emitUserDefinedReduction(
1143 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1144 if (UDRMap.count(D) > 0)
1145 return;
1146 llvm::Function *Combiner = emitCombinerOrInitializer(
1147 CGM, D->getType(), D->getCombiner(),
1148 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1149 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1150 /*IsCombiner=*/true);
1151 llvm::Function *Initializer = nullptr;
1152 if (const Expr *Init = D->getInitializer()) {
1153 Initializer = emitCombinerOrInitializer(
1154 CGM, D->getType(),
1155 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1156 : nullptr,
1157 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1158 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1159 /*IsCombiner=*/false);
1161 UDRMap.try_emplace(D, Combiner, Initializer);
1162 if (CGF) {
1163 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1164 Decls.second.push_back(D);
1168 std::pair<llvm::Function *, llvm::Function *>
1169 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1170 auto I = UDRMap.find(D);
1171 if (I != UDRMap.end())
1172 return I->second;
1173 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1174 return UDRMap.lookup(D);
1177 namespace {
1178 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1179 // Builder if one is present.
1180 struct PushAndPopStackRAII {
1181 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1182 bool HasCancel, llvm::omp::Directive Kind)
1183 : OMPBuilder(OMPBuilder) {
1184 if (!OMPBuilder)
1185 return;
1187 // The following callback is the crucial part of clangs cleanup process.
1189 // NOTE:
1190 // Once the OpenMPIRBuilder is used to create parallel regions (and
1191 // similar), the cancellation destination (Dest below) is determined via
1192 // IP. That means if we have variables to finalize we split the block at IP,
1193 // use the new block (=BB) as destination to build a JumpDest (via
1194 // getJumpDestInCurrentScope(BB)) which then is fed to
1195 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1196 // to push & pop an FinalizationInfo object.
1197 // The FiniCB will still be needed but at the point where the
1198 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1199 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1200 assert(IP.getBlock()->end() == IP.getPoint() &&
1201 "Clang CG should cause non-terminated block!");
1202 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1203 CGF.Builder.restoreIP(IP);
1204 CodeGenFunction::JumpDest Dest =
1205 CGF.getOMPCancelDestination(OMPD_parallel);
1206 CGF.EmitBranchThroughCleanup(Dest);
1209 // TODO: Remove this once we emit parallel regions through the
1210 // OpenMPIRBuilder as it can do this setup internally.
1211 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1212 OMPBuilder->pushFinalizationCB(std::move(FI));
1214 ~PushAndPopStackRAII() {
1215 if (OMPBuilder)
1216 OMPBuilder->popFinalizationCB();
1218 llvm::OpenMPIRBuilder *OMPBuilder;
1220 } // namespace
1222 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1223 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1224 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1225 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1226 assert(ThreadIDVar->getType()->isPointerType() &&
1227 "thread id variable must be of type kmp_int32 *");
1228 CodeGenFunction CGF(CGM, true);
1229 bool HasCancel = false;
1230 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1231 HasCancel = OPD->hasCancel();
1232 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1233 HasCancel = OPD->hasCancel();
1234 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1235 HasCancel = OPSD->hasCancel();
1236 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1237 HasCancel = OPFD->hasCancel();
1238 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1239 HasCancel = OPFD->hasCancel();
1240 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1241 HasCancel = OPFD->hasCancel();
1242 else if (const auto *OPFD =
1243 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1244 HasCancel = OPFD->hasCancel();
1245 else if (const auto *OPFD =
1246 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1247 HasCancel = OPFD->hasCancel();
1249 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1250 // parallel region to make cancellation barriers work properly.
1251 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1252 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1253 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1254 HasCancel, OutlinedHelperName);
1255 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1256 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1259 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1260 std::string Suffix = getName({"omp_outlined"});
1261 return (Name + Suffix).str();
1264 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1265 return getOutlinedHelperName(CGF.CurFn->getName());
1268 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1269 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1270 return (Name + Suffix).str();
1273 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1274 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1275 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1276 const RegionCodeGenTy &CodeGen) {
1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278 return emitParallelOrTeamsOutlinedFunction(
1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1280 CodeGen);
1283 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1284 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1285 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1286 const RegionCodeGenTy &CodeGen) {
1287 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1288 return emitParallelOrTeamsOutlinedFunction(
1289 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1290 CodeGen);
1293 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1294 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1295 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1296 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1297 bool Tied, unsigned &NumberOfParts) {
1298 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1299 PrePostActionTy &) {
1300 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1301 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1302 llvm::Value *TaskArgs[] = {
1303 UpLoc, ThreadID,
1304 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1305 TaskTVar->getType()->castAs<PointerType>())
1306 .getPointer(CGF)};
1307 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1308 CGM.getModule(), OMPRTL___kmpc_omp_task),
1309 TaskArgs);
1311 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1312 UntiedCodeGen);
1313 CodeGen.setAction(Action);
1314 assert(!ThreadIDVar->getType()->isPointerType() &&
1315 "thread id variable must be of type kmp_int32 for tasks");
1316 const OpenMPDirectiveKind Region =
1317 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1318 : OMPD_task;
1319 const CapturedStmt *CS = D.getCapturedStmt(Region);
1320 bool HasCancel = false;
1321 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1322 HasCancel = TD->hasCancel();
1323 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1324 HasCancel = TD->hasCancel();
1325 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1326 HasCancel = TD->hasCancel();
1327 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1328 HasCancel = TD->hasCancel();
1330 CodeGenFunction CGF(CGM, true);
1331 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1332 InnermostKind, HasCancel, Action);
1333 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1334 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1335 if (!Tied)
1336 NumberOfParts = Action.getNumberOfParts();
1337 return Res;
1340 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1341 bool AtCurrentPoint) {
1342 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1343 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1345 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1346 if (AtCurrentPoint) {
1347 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1348 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1349 } else {
1350 Elem.second.ServiceInsertPt =
1351 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1352 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1356 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1357 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1358 if (Elem.second.ServiceInsertPt) {
1359 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1360 Elem.second.ServiceInsertPt = nullptr;
1361 Ptr->eraseFromParent();
1365 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1366 SourceLocation Loc,
1367 SmallString<128> &Buffer) {
1368 llvm::raw_svector_ostream OS(Buffer);
1369 // Build debug location
1370 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1371 OS << ";" << PLoc.getFilename() << ";";
1372 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1373 OS << FD->getQualifiedNameAsString();
1374 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1375 return OS.str();
1378 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1379 SourceLocation Loc,
1380 unsigned Flags, bool EmitLoc) {
1381 uint32_t SrcLocStrSize;
1382 llvm::Constant *SrcLocStr;
1383 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1384 llvm::codegenoptions::NoDebugInfo) ||
1385 Loc.isInvalid()) {
1386 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1387 } else {
1388 std::string FunctionName;
1389 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1390 FunctionName = FD->getQualifiedNameAsString();
1391 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1392 const char *FileName = PLoc.getFilename();
1393 unsigned Line = PLoc.getLine();
1394 unsigned Column = PLoc.getColumn();
1395 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1396 Column, SrcLocStrSize);
1398 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1399 return OMPBuilder.getOrCreateIdent(
1400 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1403 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1404 SourceLocation Loc) {
1405 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1406 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1407 // the clang invariants used below might be broken.
1408 if (CGM.getLangOpts().OpenMPIRBuilder) {
1409 SmallString<128> Buffer;
1410 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1411 uint32_t SrcLocStrSize;
1412 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1413 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1414 return OMPBuilder.getOrCreateThreadID(
1415 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1418 llvm::Value *ThreadID = nullptr;
1419 // Check whether we've already cached a load of the thread id in this
1420 // function.
1421 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1422 if (I != OpenMPLocThreadIDMap.end()) {
1423 ThreadID = I->second.ThreadID;
1424 if (ThreadID != nullptr)
1425 return ThreadID;
1427 // If exceptions are enabled, do not use parameter to avoid possible crash.
1428 if (auto *OMPRegionInfo =
1429 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1430 if (OMPRegionInfo->getThreadIDVariable()) {
1431 // Check if this an outlined function with thread id passed as argument.
1432 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1433 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1434 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1435 !CGF.getLangOpts().CXXExceptions ||
1436 CGF.Builder.GetInsertBlock() == TopBlock ||
1437 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1438 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1439 TopBlock ||
1440 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1441 CGF.Builder.GetInsertBlock()) {
1442 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1443 // If value loaded in entry block, cache it and use it everywhere in
1444 // function.
1445 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1446 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1447 Elem.second.ThreadID = ThreadID;
1449 return ThreadID;
1454 // This is not an outlined function region - need to call __kmpc_int32
1455 // kmpc_global_thread_num(ident_t *loc).
1456 // Generate thread id value and cache this value for use across the
1457 // function.
1458 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1459 if (!Elem.second.ServiceInsertPt)
1460 setLocThreadIdInsertPt(CGF);
1461 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1462 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1463 llvm::CallInst *Call = CGF.Builder.CreateCall(
1464 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1465 OMPRTL___kmpc_global_thread_num),
1466 emitUpdateLocation(CGF, Loc));
1467 Call->setCallingConv(CGF.getRuntimeCC());
1468 Elem.second.ThreadID = Call;
1469 return Call;
1472 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1473 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1474 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1475 clearLocThreadIdInsertPt(CGF);
1476 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1478 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1479 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1480 UDRMap.erase(D);
1481 FunctionUDRMap.erase(CGF.CurFn);
1483 auto I = FunctionUDMMap.find(CGF.CurFn);
1484 if (I != FunctionUDMMap.end()) {
1485 for(const auto *D : I->second)
1486 UDMMap.erase(D);
1487 FunctionUDMMap.erase(I);
1489 LastprivateConditionalToTypes.erase(CGF.CurFn);
1490 FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1493 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1494 return OMPBuilder.IdentPtr;
1497 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1498 if (!Kmpc_MicroTy) {
1499 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1500 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1501 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1502 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1504 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1507 llvm::FunctionCallee
1508 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1509 bool IsGPUDistribute) {
1510 assert((IVSize == 32 || IVSize == 64) &&
1511 "IV size is not compatible with the omp runtime");
1512 StringRef Name;
1513 if (IsGPUDistribute)
1514 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1515 : "__kmpc_distribute_static_init_4u")
1516 : (IVSigned ? "__kmpc_distribute_static_init_8"
1517 : "__kmpc_distribute_static_init_8u");
1518 else
1519 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1520 : "__kmpc_for_static_init_4u")
1521 : (IVSigned ? "__kmpc_for_static_init_8"
1522 : "__kmpc_for_static_init_8u");
1524 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1525 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1526 llvm::Type *TypeParams[] = {
1527 getIdentTyPointerTy(), // loc
1528 CGM.Int32Ty, // tid
1529 CGM.Int32Ty, // schedtype
1530 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1531 PtrTy, // p_lower
1532 PtrTy, // p_upper
1533 PtrTy, // p_stride
1534 ITy, // incr
1535 ITy // chunk
1537 auto *FnTy =
1538 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1539 return CGM.CreateRuntimeFunction(FnTy, Name);
1542 llvm::FunctionCallee
1543 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1544 assert((IVSize == 32 || IVSize == 64) &&
1545 "IV size is not compatible with the omp runtime");
1546 StringRef Name =
1547 IVSize == 32
1548 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1549 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1550 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1551 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1552 CGM.Int32Ty, // tid
1553 CGM.Int32Ty, // schedtype
1554 ITy, // lower
1555 ITy, // upper
1556 ITy, // stride
1557 ITy // chunk
1559 auto *FnTy =
1560 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1561 return CGM.CreateRuntimeFunction(FnTy, Name);
1564 llvm::FunctionCallee
1565 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1566 assert((IVSize == 32 || IVSize == 64) &&
1567 "IV size is not compatible with the omp runtime");
1568 StringRef Name =
1569 IVSize == 32
1570 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1571 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1572 llvm::Type *TypeParams[] = {
1573 getIdentTyPointerTy(), // loc
1574 CGM.Int32Ty, // tid
1576 auto *FnTy =
1577 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1578 return CGM.CreateRuntimeFunction(FnTy, Name);
1581 llvm::FunctionCallee
1582 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1583 assert((IVSize == 32 || IVSize == 64) &&
1584 "IV size is not compatible with the omp runtime");
1585 StringRef Name =
1586 IVSize == 32
1587 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1588 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1589 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1590 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1591 llvm::Type *TypeParams[] = {
1592 getIdentTyPointerTy(), // loc
1593 CGM.Int32Ty, // tid
1594 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1595 PtrTy, // p_lower
1596 PtrTy, // p_upper
1597 PtrTy // p_stride
1599 auto *FnTy =
1600 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1601 return CGM.CreateRuntimeFunction(FnTy, Name);
1604 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1605 convertDeviceClause(const VarDecl *VD) {
1606 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1607 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1608 if (!DevTy)
1609 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1611 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1612 case OMPDeclareTargetDeclAttr::DT_Host:
1613 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1614 break;
1615 case OMPDeclareTargetDeclAttr::DT_NoHost:
1616 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1617 break;
1618 case OMPDeclareTargetDeclAttr::DT_Any:
1619 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1620 break;
1621 default:
1622 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1623 break;
1627 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1628 convertCaptureClause(const VarDecl *VD) {
1629 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1630 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1631 if (!MapType)
1632 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1633 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1634 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1635 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1636 break;
1637 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1638 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1639 break;
1640 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1641 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1642 break;
1643 default:
1644 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1645 break;
1649 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1650 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1651 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1653 auto FileInfoCallBack = [&]() {
1654 SourceManager &SM = CGM.getContext().getSourceManager();
1655 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1657 llvm::sys::fs::UniqueID ID;
1658 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1659 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1662 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1665 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1668 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1669 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1671 auto LinkageForVariable = [&VD, this]() {
1672 return CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
1675 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1677 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1678 CGM.getContext().getPointerType(VD->getType()));
1679 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1680 convertCaptureClause(VD), convertDeviceClause(VD),
1681 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1682 VD->isExternallyVisible(),
1683 getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1684 VD->getCanonicalDecl()->getBeginLoc()),
1685 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1686 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1687 LinkageForVariable);
1689 if (!addr)
1690 return Address::invalid();
1691 return Address(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1694 llvm::Constant *
1695 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1696 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1697 !CGM.getContext().getTargetInfo().isTLSSupported());
1698 // Lookup the entry, lazily creating it if necessary.
1699 std::string Suffix = getName({"cache", ""});
1700 return OMPBuilder.getOrCreateInternalVariable(
1701 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1704 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1705 const VarDecl *VD,
1706 Address VDAddr,
1707 SourceLocation Loc) {
1708 if (CGM.getLangOpts().OpenMPUseTLS &&
1709 CGM.getContext().getTargetInfo().isTLSSupported())
1710 return VDAddr;
1712 llvm::Type *VarTy = VDAddr.getElementType();
1713 llvm::Value *Args[] = {
1714 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1715 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1716 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1717 getOrCreateThreadPrivateCache(VD)};
1718 return Address(
1719 CGF.EmitRuntimeCall(
1720 OMPBuilder.getOrCreateRuntimeFunction(
1721 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1722 Args),
1723 CGF.Int8Ty, VDAddr.getAlignment());
1726 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1727 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1728 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1729 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1730 // library.
1731 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1732 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1733 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1734 OMPLoc);
1735 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1736 // to register constructor/destructor for variable.
1737 llvm::Value *Args[] = {
1738 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1739 Ctor, CopyCtor, Dtor};
1740 CGF.EmitRuntimeCall(
1741 OMPBuilder.getOrCreateRuntimeFunction(
1742 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1743 Args);
1746 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1747 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1748 bool PerformInit, CodeGenFunction *CGF) {
1749 if (CGM.getLangOpts().OpenMPUseTLS &&
1750 CGM.getContext().getTargetInfo().isTLSSupported())
1751 return nullptr;
1753 VD = VD->getDefinition(CGM.getContext());
1754 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1755 QualType ASTTy = VD->getType();
1757 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1758 const Expr *Init = VD->getAnyInitializer();
1759 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1760 // Generate function that re-emits the declaration's initializer into the
1761 // threadprivate copy of the variable VD
1762 CodeGenFunction CtorCGF(CGM);
1763 FunctionArgList Args;
1764 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1765 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1766 ImplicitParamDecl::Other);
1767 Args.push_back(&Dst);
1769 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1770 CGM.getContext().VoidPtrTy, Args);
1771 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1772 std::string Name = getName({"__kmpc_global_ctor_", ""});
1773 llvm::Function *Fn =
1774 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1775 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1776 Args, Loc, Loc);
1777 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1778 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1779 CGM.getContext().VoidPtrTy, Dst.getLocation());
1780 Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1781 Arg = CtorCGF.Builder.CreateElementBitCast(
1782 Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1783 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1784 /*IsInitializer=*/true);
1785 ArgVal = CtorCGF.EmitLoadOfScalar(
1786 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1787 CGM.getContext().VoidPtrTy, Dst.getLocation());
1788 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1789 CtorCGF.FinishFunction();
1790 Ctor = Fn;
1792 if (VD->getType().isDestructedType() != QualType::DK_none) {
1793 // Generate function that emits destructor call for the threadprivate copy
1794 // of the variable VD
1795 CodeGenFunction DtorCGF(CGM);
1796 FunctionArgList Args;
1797 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1798 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1799 ImplicitParamDecl::Other);
1800 Args.push_back(&Dst);
1802 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1803 CGM.getContext().VoidTy, Args);
1804 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1805 std::string Name = getName({"__kmpc_global_dtor_", ""});
1806 llvm::Function *Fn =
1807 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1808 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1809 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1810 Loc, Loc);
1811 // Create a scope with an artificial location for the body of this function.
1812 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1813 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1814 DtorCGF.GetAddrOfLocalVar(&Dst),
1815 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1816 DtorCGF.emitDestroy(
1817 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1818 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1819 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1820 DtorCGF.FinishFunction();
1821 Dtor = Fn;
1823 // Do not emit init function if it is not required.
1824 if (!Ctor && !Dtor)
1825 return nullptr;
1827 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1828 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1829 /*isVarArg=*/false)
1830 ->getPointerTo();
1831 // Copying constructor for the threadprivate variable.
1832 // Must be NULL - reserved by runtime, but currently it requires that this
1833 // parameter is always NULL. Otherwise it fires assertion.
1834 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1835 if (Ctor == nullptr) {
1836 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1837 /*isVarArg=*/false)
1838 ->getPointerTo();
1839 Ctor = llvm::Constant::getNullValue(CtorTy);
1841 if (Dtor == nullptr) {
1842 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1843 /*isVarArg=*/false)
1844 ->getPointerTo();
1845 Dtor = llvm::Constant::getNullValue(DtorTy);
1847 if (!CGF) {
1848 auto *InitFunctionTy =
1849 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1850 std::string Name = getName({"__omp_threadprivate_init_", ""});
1851 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1852 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1853 CodeGenFunction InitCGF(CGM);
1854 FunctionArgList ArgList;
1855 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1856 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1857 Loc, Loc);
1858 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1859 InitCGF.FinishFunction();
1860 return InitFunction;
1862 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1864 return nullptr;
1867 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1868 llvm::GlobalVariable *Addr,
1869 bool PerformInit) {
1870 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1871 !CGM.getLangOpts().OpenMPIsDevice)
1872 return false;
1873 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1874 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1875 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1876 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
1877 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
1878 HasRequiresUnifiedSharedMemory))
1879 return CGM.getLangOpts().OpenMPIsDevice;
1880 VD = VD->getDefinition(CGM.getContext());
1881 assert(VD && "Unknown VarDecl");
1883 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1884 return CGM.getLangOpts().OpenMPIsDevice;
1886 QualType ASTTy = VD->getType();
1887 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1889 // Produce the unique prefix to identify the new target regions. We use
1890 // the source location of the variable declaration which we know to not
1891 // conflict with any target region.
1892 llvm::TargetRegionEntryInfo EntryInfo =
1893 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, Loc, VD->getName());
1894 SmallString<128> Buffer, Out;
1895 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo);
1897 const Expr *Init = VD->getAnyInitializer();
1898 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1899 llvm::Constant *Ctor;
1900 llvm::Constant *ID;
1901 if (CGM.getLangOpts().OpenMPIsDevice) {
1902 // Generate function that re-emits the declaration's initializer into
1903 // the threadprivate copy of the variable VD
1904 CodeGenFunction CtorCGF(CGM);
1906 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1907 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1908 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1909 FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1910 llvm::GlobalValue::WeakODRLinkage);
1911 Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1912 if (CGM.getTriple().isAMDGCN())
1913 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1914 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1915 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1916 FunctionArgList(), Loc, Loc);
1917 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1918 llvm::Constant *AddrInAS0 = Addr;
1919 if (Addr->getAddressSpace() != 0)
1920 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1921 Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
1922 CtorCGF.EmitAnyExprToMem(Init,
1923 Address(AddrInAS0, Addr->getValueType(),
1924 CGM.getContext().getDeclAlign(VD)),
1925 Init->getType().getQualifiers(),
1926 /*IsInitializer=*/true);
1927 CtorCGF.FinishFunction();
1928 Ctor = Fn;
1929 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1930 } else {
1931 Ctor = new llvm::GlobalVariable(
1932 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1933 llvm::GlobalValue::PrivateLinkage,
1934 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1935 ID = Ctor;
1938 // Register the information for the entry associated with the constructor.
1939 Out.clear();
1940 auto CtorEntryInfo = EntryInfo;
1941 CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out);
1942 OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
1943 CtorEntryInfo, Ctor, ID,
1944 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor);
1946 if (VD->getType().isDestructedType() != QualType::DK_none) {
1947 llvm::Constant *Dtor;
1948 llvm::Constant *ID;
1949 if (CGM.getLangOpts().OpenMPIsDevice) {
1950 // Generate function that emits destructor call for the threadprivate
1951 // copy of the variable VD
1952 CodeGenFunction DtorCGF(CGM);
1954 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1955 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1956 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1957 FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1958 llvm::GlobalValue::WeakODRLinkage);
1959 Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1960 if (CGM.getTriple().isAMDGCN())
1961 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1962 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1963 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1964 FunctionArgList(), Loc, Loc);
1965 // Create a scope with an artificial location for the body of this
1966 // function.
1967 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1968 llvm::Constant *AddrInAS0 = Addr;
1969 if (Addr->getAddressSpace() != 0)
1970 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1971 Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
1972 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1973 CGM.getContext().getDeclAlign(VD)),
1974 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1975 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1976 DtorCGF.FinishFunction();
1977 Dtor = Fn;
1978 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1979 } else {
1980 Dtor = new llvm::GlobalVariable(
1981 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1982 llvm::GlobalValue::PrivateLinkage,
1983 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1984 ID = Dtor;
1986 // Register the information for the entry associated with the destructor.
1987 Out.clear();
1988 auto DtorEntryInfo = EntryInfo;
1989 DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out);
1990 OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
1991 DtorEntryInfo, Dtor, ID,
1992 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor);
1994 return CGM.getLangOpts().OpenMPIsDevice;
1997 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1998 QualType VarType,
1999 StringRef Name) {
2000 std::string Suffix = getName({"artificial", ""});
2001 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2002 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
2003 VarLVType, Twine(Name).concat(Suffix).str());
2004 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2005 CGM.getTarget().isTLSSupported()) {
2006 GAddr->setThreadLocal(/*Val=*/true);
2007 return Address(GAddr, GAddr->getValueType(),
2008 CGM.getContext().getTypeAlignInChars(VarType));
2010 std::string CacheSuffix = getName({"cache", ""});
2011 llvm::Value *Args[] = {
2012 emitUpdateLocation(CGF, SourceLocation()),
2013 getThreadID(CGF, SourceLocation()),
2014 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2015 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2016 /*isSigned=*/false),
2017 OMPBuilder.getOrCreateInternalVariable(
2018 CGM.VoidPtrPtrTy,
2019 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
2020 return Address(
2021 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2022 CGF.EmitRuntimeCall(
2023 OMPBuilder.getOrCreateRuntimeFunction(
2024 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2025 Args),
2026 VarLVType->getPointerTo(/*AddrSpace=*/0)),
2027 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
2030 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2031 const RegionCodeGenTy &ThenGen,
2032 const RegionCodeGenTy &ElseGen) {
2033 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2035 // If the condition constant folds and can be elided, try to avoid emitting
2036 // the condition and the dead arm of the if/else.
2037 bool CondConstant;
2038 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2039 if (CondConstant)
2040 ThenGen(CGF);
2041 else
2042 ElseGen(CGF);
2043 return;
2046 // Otherwise, the condition did not fold, or we couldn't elide it. Just
2047 // emit the conditional branch.
2048 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2049 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2050 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2051 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2053 // Emit the 'then' code.
2054 CGF.EmitBlock(ThenBlock);
2055 ThenGen(CGF);
2056 CGF.EmitBranch(ContBlock);
2057 // Emit the 'else' code if present.
2058 // There is no need to emit line number for unconditional branch.
2059 (void)ApplyDebugLocation::CreateEmpty(CGF);
2060 CGF.EmitBlock(ElseBlock);
2061 ElseGen(CGF);
2062 // There is no need to emit line number for unconditional branch.
2063 (void)ApplyDebugLocation::CreateEmpty(CGF);
2064 CGF.EmitBranch(ContBlock);
2065 // Emit the continuation block for code after the if.
2066 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2069 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2070 llvm::Function *OutlinedFn,
2071 ArrayRef<llvm::Value *> CapturedVars,
2072 const Expr *IfCond,
2073 llvm::Value *NumThreads) {
2074 if (!CGF.HaveInsertPoint())
2075 return;
2076 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2077 auto &M = CGM.getModule();
2078 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2079 this](CodeGenFunction &CGF, PrePostActionTy &) {
2080 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2081 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2082 llvm::Value *Args[] = {
2083 RTLoc,
2084 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2085 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2086 llvm::SmallVector<llvm::Value *, 16> RealArgs;
2087 RealArgs.append(std::begin(Args), std::end(Args));
2088 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2090 llvm::FunctionCallee RTLFn =
2091 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2092 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2094 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2095 this](CodeGenFunction &CGF, PrePostActionTy &) {
2096 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2097 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2098 // Build calls:
2099 // __kmpc_serialized_parallel(&Loc, GTid);
2100 llvm::Value *Args[] = {RTLoc, ThreadID};
2101 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2102 M, OMPRTL___kmpc_serialized_parallel),
2103 Args);
2105 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2106 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2107 Address ZeroAddrBound =
2108 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2109 /*Name=*/".bound.zero.addr");
2110 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2111 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2112 // ThreadId for serialized parallels is 0.
2113 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2114 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2115 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2117 // Ensure we do not inline the function. This is trivially true for the ones
2118 // passed to __kmpc_fork_call but the ones called in serialized regions
2119 // could be inlined. This is not a perfect but it is closer to the invariant
2120 // we want, namely, every data environment starts with a new function.
2121 // TODO: We should pass the if condition to the runtime function and do the
2122 // handling there. Much cleaner code.
2123 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2124 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2125 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2127 // __kmpc_end_serialized_parallel(&Loc, GTid);
2128 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2129 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2130 M, OMPRTL___kmpc_end_serialized_parallel),
2131 EndArgs);
2133 if (IfCond) {
2134 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2135 } else {
2136 RegionCodeGenTy ThenRCG(ThenGen);
2137 ThenRCG(CGF);
2141 // If we're inside an (outlined) parallel region, use the region info's
2142 // thread-ID variable (it is passed in a first argument of the outlined function
2143 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2144 // regular serial code region, get thread ID by calling kmp_int32
2145 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2146 // return the address of that temp.
2147 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2148 SourceLocation Loc) {
2149 if (auto *OMPRegionInfo =
2150 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2151 if (OMPRegionInfo->getThreadIDVariable())
2152 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2154 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2155 QualType Int32Ty =
2156 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2157 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2158 CGF.EmitStoreOfScalar(ThreadID,
2159 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2161 return ThreadIDTemp;
2164 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2165 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2166 std::string Name = getName({Prefix, "var"});
2167 llvm::GlobalVariable *G = OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2168 llvm::Align PtrAlign = OMPBuilder.M.getDataLayout().getPointerABIAlignment(G->getAddressSpace());
2169 if (PtrAlign > llvm::Align(G->getAlignment()))
2170 G->setAlignment(PtrAlign);
2171 return G;
2174 namespace {
2175 /// Common pre(post)-action for different OpenMP constructs.
2176 class CommonActionTy final : public PrePostActionTy {
2177 llvm::FunctionCallee EnterCallee;
2178 ArrayRef<llvm::Value *> EnterArgs;
2179 llvm::FunctionCallee ExitCallee;
2180 ArrayRef<llvm::Value *> ExitArgs;
2181 bool Conditional;
2182 llvm::BasicBlock *ContBlock = nullptr;
2184 public:
2185 CommonActionTy(llvm::FunctionCallee EnterCallee,
2186 ArrayRef<llvm::Value *> EnterArgs,
2187 llvm::FunctionCallee ExitCallee,
2188 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2189 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2190 ExitArgs(ExitArgs), Conditional(Conditional) {}
2191 void Enter(CodeGenFunction &CGF) override {
2192 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2193 if (Conditional) {
2194 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2195 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2196 ContBlock = CGF.createBasicBlock("omp_if.end");
2197 // Generate the branch (If-stmt)
2198 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2199 CGF.EmitBlock(ThenBlock);
2202 void Done(CodeGenFunction &CGF) {
2203 // Emit the rest of blocks/branches
2204 CGF.EmitBranch(ContBlock);
2205 CGF.EmitBlock(ContBlock, true);
2207 void Exit(CodeGenFunction &CGF) override {
2208 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2211 } // anonymous namespace
2213 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2214 StringRef CriticalName,
2215 const RegionCodeGenTy &CriticalOpGen,
2216 SourceLocation Loc, const Expr *Hint) {
2217 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2218 // CriticalOpGen();
2219 // __kmpc_end_critical(ident_t *, gtid, Lock);
2220 // Prepare arguments and build a call to __kmpc_critical
2221 if (!CGF.HaveInsertPoint())
2222 return;
2223 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2224 getCriticalRegionLock(CriticalName)};
2225 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2226 std::end(Args));
2227 if (Hint) {
2228 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2229 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2231 CommonActionTy Action(
2232 OMPBuilder.getOrCreateRuntimeFunction(
2233 CGM.getModule(),
2234 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2235 EnterArgs,
2236 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2237 OMPRTL___kmpc_end_critical),
2238 Args);
2239 CriticalOpGen.setAction(Action);
2240 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2243 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2244 const RegionCodeGenTy &MasterOpGen,
2245 SourceLocation Loc) {
2246 if (!CGF.HaveInsertPoint())
2247 return;
2248 // if(__kmpc_master(ident_t *, gtid)) {
2249 // MasterOpGen();
2250 // __kmpc_end_master(ident_t *, gtid);
2251 // }
2252 // Prepare arguments and build a call to __kmpc_master
2253 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2254 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2255 CGM.getModule(), OMPRTL___kmpc_master),
2256 Args,
2257 OMPBuilder.getOrCreateRuntimeFunction(
2258 CGM.getModule(), OMPRTL___kmpc_end_master),
2259 Args,
2260 /*Conditional=*/true);
2261 MasterOpGen.setAction(Action);
2262 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2263 Action.Done(CGF);
2266 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2267 const RegionCodeGenTy &MaskedOpGen,
2268 SourceLocation Loc, const Expr *Filter) {
2269 if (!CGF.HaveInsertPoint())
2270 return;
2271 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2272 // MaskedOpGen();
2273 // __kmpc_end_masked(iden_t *, gtid);
2274 // }
2275 // Prepare arguments and build a call to __kmpc_masked
2276 llvm::Value *FilterVal = Filter
2277 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2278 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2279 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2280 FilterVal};
2281 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2282 getThreadID(CGF, Loc)};
2283 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2284 CGM.getModule(), OMPRTL___kmpc_masked),
2285 Args,
2286 OMPBuilder.getOrCreateRuntimeFunction(
2287 CGM.getModule(), OMPRTL___kmpc_end_masked),
2288 ArgsEnd,
2289 /*Conditional=*/true);
2290 MaskedOpGen.setAction(Action);
2291 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2292 Action.Done(CGF);
2295 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2296 SourceLocation Loc) {
2297 if (!CGF.HaveInsertPoint())
2298 return;
2299 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2300 OMPBuilder.createTaskyield(CGF.Builder);
2301 } else {
2302 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2303 llvm::Value *Args[] = {
2304 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2305 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2306 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2307 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2308 Args);
2311 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2312 Region->emitUntiedSwitch(CGF);
2315 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2316 const RegionCodeGenTy &TaskgroupOpGen,
2317 SourceLocation Loc) {
2318 if (!CGF.HaveInsertPoint())
2319 return;
2320 // __kmpc_taskgroup(ident_t *, gtid);
2321 // TaskgroupOpGen();
2322 // __kmpc_end_taskgroup(ident_t *, gtid);
2323 // Prepare arguments and build a call to __kmpc_taskgroup
2324 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2325 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2326 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2327 Args,
2328 OMPBuilder.getOrCreateRuntimeFunction(
2329 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2330 Args);
2331 TaskgroupOpGen.setAction(Action);
2332 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2335 /// Given an array of pointers to variables, project the address of a
2336 /// given variable.
2337 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2338 unsigned Index, const VarDecl *Var) {
2339 // Pull out the pointer to the variable.
2340 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2341 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2343 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2344 return Address(
2345 CGF.Builder.CreateBitCast(
2346 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2347 ElemTy, CGF.getContext().getDeclAlign(Var));
2350 static llvm::Value *emitCopyprivateCopyFunction(
2351 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2352 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2353 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2354 SourceLocation Loc) {
2355 ASTContext &C = CGM.getContext();
2356 // void copy_func(void *LHSArg, void *RHSArg);
2357 FunctionArgList Args;
2358 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2359 ImplicitParamDecl::Other);
2360 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2361 ImplicitParamDecl::Other);
2362 Args.push_back(&LHSArg);
2363 Args.push_back(&RHSArg);
2364 const auto &CGFI =
2365 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2366 std::string Name =
2367 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2368 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2369 llvm::GlobalValue::InternalLinkage, Name,
2370 &CGM.getModule());
2371 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2372 Fn->setDoesNotRecurse();
2373 CodeGenFunction CGF(CGM);
2374 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2375 // Dest = (void*[n])(LHSArg);
2376 // Src = (void*[n])(RHSArg);
2377 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2378 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2379 ArgsElemType->getPointerTo()),
2380 ArgsElemType, CGF.getPointerAlign());
2381 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2382 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2383 ArgsElemType->getPointerTo()),
2384 ArgsElemType, CGF.getPointerAlign());
2385 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2386 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2387 // ...
2388 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2389 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2390 const auto *DestVar =
2391 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2392 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2394 const auto *SrcVar =
2395 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2396 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2398 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2399 QualType Type = VD->getType();
2400 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2402 CGF.FinishFunction();
2403 return Fn;
2406 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2407 const RegionCodeGenTy &SingleOpGen,
2408 SourceLocation Loc,
2409 ArrayRef<const Expr *> CopyprivateVars,
2410 ArrayRef<const Expr *> SrcExprs,
2411 ArrayRef<const Expr *> DstExprs,
2412 ArrayRef<const Expr *> AssignmentOps) {
2413 if (!CGF.HaveInsertPoint())
2414 return;
2415 assert(CopyprivateVars.size() == SrcExprs.size() &&
2416 CopyprivateVars.size() == DstExprs.size() &&
2417 CopyprivateVars.size() == AssignmentOps.size());
2418 ASTContext &C = CGM.getContext();
2419 // int32 did_it = 0;
2420 // if(__kmpc_single(ident_t *, gtid)) {
2421 // SingleOpGen();
2422 // __kmpc_end_single(ident_t *, gtid);
2423 // did_it = 1;
2424 // }
2425 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2426 // <copy_func>, did_it);
2428 Address DidIt = Address::invalid();
2429 if (!CopyprivateVars.empty()) {
2430 // int32 did_it = 0;
2431 QualType KmpInt32Ty =
2432 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2433 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2434 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2436 // Prepare arguments and build a call to __kmpc_single
2437 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2438 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2439 CGM.getModule(), OMPRTL___kmpc_single),
2440 Args,
2441 OMPBuilder.getOrCreateRuntimeFunction(
2442 CGM.getModule(), OMPRTL___kmpc_end_single),
2443 Args,
2444 /*Conditional=*/true);
2445 SingleOpGen.setAction(Action);
2446 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2447 if (DidIt.isValid()) {
2448 // did_it = 1;
2449 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2451 Action.Done(CGF);
2452 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2453 // <copy_func>, did_it);
2454 if (DidIt.isValid()) {
2455 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2456 QualType CopyprivateArrayTy = C.getConstantArrayType(
2457 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2458 /*IndexTypeQuals=*/0);
2459 // Create a list of all private variables for copyprivate.
2460 Address CopyprivateList =
2461 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2462 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2463 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2464 CGF.Builder.CreateStore(
2465 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2466 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2467 CGF.VoidPtrTy),
2468 Elem);
2470 // Build function that copies private values from single region to all other
2471 // threads in the corresponding parallel region.
2472 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2473 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2474 SrcExprs, DstExprs, AssignmentOps, Loc);
2475 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2476 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2477 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2478 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2479 llvm::Value *Args[] = {
2480 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2481 getThreadID(CGF, Loc), // i32 <gtid>
2482 BufSize, // size_t <buf_size>
2483 CL.getPointer(), // void *<copyprivate list>
2484 CpyFn, // void (*) (void *, void *) <copy_func>
2485 DidItVal // i32 did_it
2487 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2488 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2489 Args);
2493 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2494 const RegionCodeGenTy &OrderedOpGen,
2495 SourceLocation Loc, bool IsThreads) {
2496 if (!CGF.HaveInsertPoint())
2497 return;
2498 // __kmpc_ordered(ident_t *, gtid);
2499 // OrderedOpGen();
2500 // __kmpc_end_ordered(ident_t *, gtid);
2501 // Prepare arguments and build a call to __kmpc_ordered
2502 if (IsThreads) {
2503 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2504 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2505 CGM.getModule(), OMPRTL___kmpc_ordered),
2506 Args,
2507 OMPBuilder.getOrCreateRuntimeFunction(
2508 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2509 Args);
2510 OrderedOpGen.setAction(Action);
2511 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2512 return;
2514 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2517 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2518 unsigned Flags;
2519 if (Kind == OMPD_for)
2520 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2521 else if (Kind == OMPD_sections)
2522 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2523 else if (Kind == OMPD_single)
2524 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2525 else if (Kind == OMPD_barrier)
2526 Flags = OMP_IDENT_BARRIER_EXPL;
2527 else
2528 Flags = OMP_IDENT_BARRIER_IMPL;
2529 return Flags;
2532 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2533 CodeGenFunction &CGF, const OMPLoopDirective &S,
2534 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2535 // Check if the loop directive is actually a doacross loop directive. In this
2536 // case choose static, 1 schedule.
2537 if (llvm::any_of(
2538 S.getClausesOfKind<OMPOrderedClause>(),
2539 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2540 ScheduleKind = OMPC_SCHEDULE_static;
2541 // Chunk size is 1 in this case.
2542 llvm::APInt ChunkSize(32, 1);
2543 ChunkExpr = IntegerLiteral::Create(
2544 CGF.getContext(), ChunkSize,
2545 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2546 SourceLocation());
2550 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2551 OpenMPDirectiveKind Kind, bool EmitChecks,
2552 bool ForceSimpleCall) {
2553 // Check if we should use the OMPBuilder
2554 auto *OMPRegionInfo =
2555 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2556 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2557 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2558 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2559 return;
2562 if (!CGF.HaveInsertPoint())
2563 return;
2564 // Build call __kmpc_cancel_barrier(loc, thread_id);
2565 // Build call __kmpc_barrier(loc, thread_id);
2566 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2567 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2568 // thread_id);
2569 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2570 getThreadID(CGF, Loc)};
2571 if (OMPRegionInfo) {
2572 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2573 llvm::Value *Result = CGF.EmitRuntimeCall(
2574 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2575 OMPRTL___kmpc_cancel_barrier),
2576 Args);
2577 if (EmitChecks) {
2578 // if (__kmpc_cancel_barrier()) {
2579 // exit from construct;
2580 // }
2581 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2582 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2583 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2584 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2585 CGF.EmitBlock(ExitBB);
2586 // exit from construct;
2587 CodeGenFunction::JumpDest CancelDestination =
2588 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2589 CGF.EmitBranchThroughCleanup(CancelDestination);
2590 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2592 return;
2595 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2596 CGM.getModule(), OMPRTL___kmpc_barrier),
2597 Args);
2600 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2601 Expr *ME, bool IsFatal) {
2602 llvm::Value *MVL =
2603 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2604 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2605 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2606 // *message)
2607 llvm::Value *Args[] = {
2608 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2609 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2610 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2611 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2612 CGM.getModule(), OMPRTL___kmpc_error),
2613 Args);
2616 /// Map the OpenMP loop schedule to the runtime enumeration.
2617 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2618 bool Chunked, bool Ordered) {
2619 switch (ScheduleKind) {
2620 case OMPC_SCHEDULE_static:
2621 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2622 : (Ordered ? OMP_ord_static : OMP_sch_static);
2623 case OMPC_SCHEDULE_dynamic:
2624 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2625 case OMPC_SCHEDULE_guided:
2626 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2627 case OMPC_SCHEDULE_runtime:
2628 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2629 case OMPC_SCHEDULE_auto:
2630 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2631 case OMPC_SCHEDULE_unknown:
2632 assert(!Chunked && "chunk was specified but schedule kind not known");
2633 return Ordered ? OMP_ord_static : OMP_sch_static;
2635 llvm_unreachable("Unexpected runtime schedule");
2638 /// Map the OpenMP distribute schedule to the runtime enumeration.
2639 static OpenMPSchedType
2640 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2641 // only static is allowed for dist_schedule
2642 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2645 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2646 bool Chunked) const {
2647 OpenMPSchedType Schedule =
2648 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2649 return Schedule == OMP_sch_static;
2652 bool CGOpenMPRuntime::isStaticNonchunked(
2653 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2654 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2655 return Schedule == OMP_dist_sch_static;
2658 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2659 bool Chunked) const {
2660 OpenMPSchedType Schedule =
2661 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2662 return Schedule == OMP_sch_static_chunked;
2665 bool CGOpenMPRuntime::isStaticChunked(
2666 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2667 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2668 return Schedule == OMP_dist_sch_static_chunked;
2671 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2672 OpenMPSchedType Schedule =
2673 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2674 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2675 return Schedule != OMP_sch_static;
2678 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2679 OpenMPScheduleClauseModifier M1,
2680 OpenMPScheduleClauseModifier M2) {
2681 int Modifier = 0;
2682 switch (M1) {
2683 case OMPC_SCHEDULE_MODIFIER_monotonic:
2684 Modifier = OMP_sch_modifier_monotonic;
2685 break;
2686 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2687 Modifier = OMP_sch_modifier_nonmonotonic;
2688 break;
2689 case OMPC_SCHEDULE_MODIFIER_simd:
2690 if (Schedule == OMP_sch_static_chunked)
2691 Schedule = OMP_sch_static_balanced_chunked;
2692 break;
2693 case OMPC_SCHEDULE_MODIFIER_last:
2694 case OMPC_SCHEDULE_MODIFIER_unknown:
2695 break;
2697 switch (M2) {
2698 case OMPC_SCHEDULE_MODIFIER_monotonic:
2699 Modifier = OMP_sch_modifier_monotonic;
2700 break;
2701 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2702 Modifier = OMP_sch_modifier_nonmonotonic;
2703 break;
2704 case OMPC_SCHEDULE_MODIFIER_simd:
2705 if (Schedule == OMP_sch_static_chunked)
2706 Schedule = OMP_sch_static_balanced_chunked;
2707 break;
2708 case OMPC_SCHEDULE_MODIFIER_last:
2709 case OMPC_SCHEDULE_MODIFIER_unknown:
2710 break;
2712 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2713 // If the static schedule kind is specified or if the ordered clause is
2714 // specified, and if the nonmonotonic modifier is not specified, the effect is
2715 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2716 // modifier is specified, the effect is as if the nonmonotonic modifier is
2717 // specified.
2718 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2719 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2720 Schedule == OMP_sch_static_balanced_chunked ||
2721 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2722 Schedule == OMP_dist_sch_static_chunked ||
2723 Schedule == OMP_dist_sch_static))
2724 Modifier = OMP_sch_modifier_nonmonotonic;
2726 return Schedule | Modifier;
2729 void CGOpenMPRuntime::emitForDispatchInit(
2730 CodeGenFunction &CGF, SourceLocation Loc,
2731 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2732 bool Ordered, const DispatchRTInput &DispatchValues) {
2733 if (!CGF.HaveInsertPoint())
2734 return;
2735 OpenMPSchedType Schedule = getRuntimeSchedule(
2736 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2737 assert(Ordered ||
2738 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2739 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2740 Schedule != OMP_sch_static_balanced_chunked));
2741 // Call __kmpc_dispatch_init(
2742 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2743 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2744 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2746 // If the Chunk was not specified in the clause - use default value 1.
2747 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2748 : CGF.Builder.getIntN(IVSize, 1);
2749 llvm::Value *Args[] = {
2750 emitUpdateLocation(CGF, Loc),
2751 getThreadID(CGF, Loc),
2752 CGF.Builder.getInt32(addMonoNonMonoModifier(
2753 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2754 DispatchValues.LB, // Lower
2755 DispatchValues.UB, // Upper
2756 CGF.Builder.getIntN(IVSize, 1), // Stride
2757 Chunk // Chunk
2759 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2762 static void emitForStaticInitCall(
2763 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2764 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2765 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2766 const CGOpenMPRuntime::StaticRTInput &Values) {
2767 if (!CGF.HaveInsertPoint())
2768 return;
2770 assert(!Values.Ordered);
2771 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2772 Schedule == OMP_sch_static_balanced_chunked ||
2773 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2774 Schedule == OMP_dist_sch_static ||
2775 Schedule == OMP_dist_sch_static_chunked);
2777 // Call __kmpc_for_static_init(
2778 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2779 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2780 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2781 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2782 llvm::Value *Chunk = Values.Chunk;
2783 if (Chunk == nullptr) {
2784 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2785 Schedule == OMP_dist_sch_static) &&
2786 "expected static non-chunked schedule");
2787 // If the Chunk was not specified in the clause - use default value 1.
2788 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2789 } else {
2790 assert((Schedule == OMP_sch_static_chunked ||
2791 Schedule == OMP_sch_static_balanced_chunked ||
2792 Schedule == OMP_ord_static_chunked ||
2793 Schedule == OMP_dist_sch_static_chunked) &&
2794 "expected static chunked schedule");
2796 llvm::Value *Args[] = {
2797 UpdateLocation,
2798 ThreadId,
2799 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2800 M2)), // Schedule type
2801 Values.IL.getPointer(), // &isLastIter
2802 Values.LB.getPointer(), // &LB
2803 Values.UB.getPointer(), // &UB
2804 Values.ST.getPointer(), // &Stride
2805 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2806 Chunk // Chunk
2808 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2811 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2812 SourceLocation Loc,
2813 OpenMPDirectiveKind DKind,
2814 const OpenMPScheduleTy &ScheduleKind,
2815 const StaticRTInput &Values) {
2816 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2817 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2818 assert(isOpenMPWorksharingDirective(DKind) &&
2819 "Expected loop-based or sections-based directive.");
2820 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2821 isOpenMPLoopDirective(DKind)
2822 ? OMP_IDENT_WORK_LOOP
2823 : OMP_IDENT_WORK_SECTIONS);
2824 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2825 llvm::FunctionCallee StaticInitFunction =
2826 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2827 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2828 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2829 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2832 void CGOpenMPRuntime::emitDistributeStaticInit(
2833 CodeGenFunction &CGF, SourceLocation Loc,
2834 OpenMPDistScheduleClauseKind SchedKind,
2835 const CGOpenMPRuntime::StaticRTInput &Values) {
2836 OpenMPSchedType ScheduleNum =
2837 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2838 llvm::Value *UpdatedLocation =
2839 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2840 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2841 llvm::FunctionCallee StaticInitFunction;
2842 bool isGPUDistribute =
2843 CGM.getLangOpts().OpenMPIsDevice &&
2844 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2845 StaticInitFunction = createForStaticInitFunction(
2846 Values.IVSize, Values.IVSigned, isGPUDistribute);
2848 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2849 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2850 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2853 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2854 SourceLocation Loc,
2855 OpenMPDirectiveKind DKind) {
2856 if (!CGF.HaveInsertPoint())
2857 return;
2858 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2859 llvm::Value *Args[] = {
2860 emitUpdateLocation(CGF, Loc,
2861 isOpenMPDistributeDirective(DKind)
2862 ? OMP_IDENT_WORK_DISTRIBUTE
2863 : isOpenMPLoopDirective(DKind)
2864 ? OMP_IDENT_WORK_LOOP
2865 : OMP_IDENT_WORK_SECTIONS),
2866 getThreadID(CGF, Loc)};
2867 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2868 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2869 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2870 CGF.EmitRuntimeCall(
2871 OMPBuilder.getOrCreateRuntimeFunction(
2872 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2873 Args);
2874 else
2875 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2876 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2877 Args);
2880 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2881 SourceLocation Loc,
2882 unsigned IVSize,
2883 bool IVSigned) {
2884 if (!CGF.HaveInsertPoint())
2885 return;
2886 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2887 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2888 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2891 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2892 SourceLocation Loc, unsigned IVSize,
2893 bool IVSigned, Address IL,
2894 Address LB, Address UB,
2895 Address ST) {
2896 // Call __kmpc_dispatch_next(
2897 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2898 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2899 // kmp_int[32|64] *p_stride);
2900 llvm::Value *Args[] = {
2901 emitUpdateLocation(CGF, Loc),
2902 getThreadID(CGF, Loc),
2903 IL.getPointer(), // &isLastIter
2904 LB.getPointer(), // &Lower
2905 UB.getPointer(), // &Upper
2906 ST.getPointer() // &Stride
2908 llvm::Value *Call =
2909 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2910 return CGF.EmitScalarConversion(
2911 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2912 CGF.getContext().BoolTy, Loc);
2915 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2916 llvm::Value *NumThreads,
2917 SourceLocation Loc) {
2918 if (!CGF.HaveInsertPoint())
2919 return;
2920 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2921 llvm::Value *Args[] = {
2922 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2923 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2924 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2925 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2926 Args);
2929 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2930 ProcBindKind ProcBind,
2931 SourceLocation Loc) {
2932 if (!CGF.HaveInsertPoint())
2933 return;
2934 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2935 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2936 llvm::Value *Args[] = {
2937 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2938 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2939 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2940 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2941 Args);
2944 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2945 SourceLocation Loc, llvm::AtomicOrdering AO) {
2946 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2947 OMPBuilder.createFlush(CGF.Builder);
2948 } else {
2949 if (!CGF.HaveInsertPoint())
2950 return;
2951 // Build call void __kmpc_flush(ident_t *loc)
2952 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2953 CGM.getModule(), OMPRTL___kmpc_flush),
2954 emitUpdateLocation(CGF, Loc));
2958 namespace {
2959 /// Indexes of fields for type kmp_task_t.
2960 enum KmpTaskTFields {
2961 /// List of shared variables.
2962 KmpTaskTShareds,
2963 /// Task routine.
2964 KmpTaskTRoutine,
2965 /// Partition id for the untied tasks.
2966 KmpTaskTPartId,
2967 /// Function with call of destructors for private variables.
2968 Data1,
2969 /// Task priority.
2970 Data2,
2971 /// (Taskloops only) Lower bound.
2972 KmpTaskTLowerBound,
2973 /// (Taskloops only) Upper bound.
2974 KmpTaskTUpperBound,
2975 /// (Taskloops only) Stride.
2976 KmpTaskTStride,
2977 /// (Taskloops only) Is last iteration flag.
2978 KmpTaskTLastIter,
2979 /// (Taskloops only) Reduction data.
2980 KmpTaskTReductions,
2982 } // anonymous namespace
2984 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2985 // If we are in simd mode or there are no entries, we don't need to do
2986 // anything.
2987 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2988 return;
2990 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2991 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2992 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2993 SourceLocation Loc;
2994 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2995 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2996 E = CGM.getContext().getSourceManager().fileinfo_end();
2997 I != E; ++I) {
2998 if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID &&
2999 I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) {
3000 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3001 I->getFirst(), EntryInfo.Line, 1);
3002 break;
3006 switch (Kind) {
3007 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
3008 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3009 DiagnosticsEngine::Error, "Offloading entry for target region in "
3010 "%0 is incorrect: either the "
3011 "address or the ID is invalid.");
3012 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
3013 } break;
3014 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
3015 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3016 DiagnosticsEngine::Error, "Offloading entry for declare target "
3017 "variable %0 is incorrect: the "
3018 "address is invalid.");
3019 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
3020 } break;
3021 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
3022 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3023 DiagnosticsEngine::Error,
3024 "Offloading entry for declare target variable is incorrect: the "
3025 "address is invalid.");
3026 CGM.getDiags().Report(DiagID);
3027 } break;
3031 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
3034 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3035 if (!KmpRoutineEntryPtrTy) {
3036 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3037 ASTContext &C = CGM.getContext();
3038 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3039 FunctionProtoType::ExtProtoInfo EPI;
3040 KmpRoutineEntryPtrQTy = C.getPointerType(
3041 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3042 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3046 namespace {
3047 struct PrivateHelpersTy {
3048 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3049 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3050 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3051 PrivateElemInit(PrivateElemInit) {}
3052 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3053 const Expr *OriginalRef = nullptr;
3054 const VarDecl *Original = nullptr;
3055 const VarDecl *PrivateCopy = nullptr;
3056 const VarDecl *PrivateElemInit = nullptr;
3057 bool isLocalPrivate() const {
3058 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3061 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3062 } // anonymous namespace
3064 static bool isAllocatableDecl(const VarDecl *VD) {
3065 const VarDecl *CVD = VD->getCanonicalDecl();
3066 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3067 return false;
3068 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3069 // Use the default allocation.
3070 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3071 !AA->getAllocator());
3074 static RecordDecl *
3075 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3076 if (!Privates.empty()) {
3077 ASTContext &C = CGM.getContext();
3078 // Build struct .kmp_privates_t. {
3079 // /* private vars */
3080 // };
3081 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3082 RD->startDefinition();
3083 for (const auto &Pair : Privates) {
3084 const VarDecl *VD = Pair.second.Original;
3085 QualType Type = VD->getType().getNonReferenceType();
3086 // If the private variable is a local variable with lvalue ref type,
3087 // allocate the pointer instead of the pointee type.
3088 if (Pair.second.isLocalPrivate()) {
3089 if (VD->getType()->isLValueReferenceType())
3090 Type = C.getPointerType(Type);
3091 if (isAllocatableDecl(VD))
3092 Type = C.getPointerType(Type);
3094 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3095 if (VD->hasAttrs()) {
3096 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3097 E(VD->getAttrs().end());
3098 I != E; ++I)
3099 FD->addAttr(*I);
3102 RD->completeDefinition();
3103 return RD;
3105 return nullptr;
3108 static RecordDecl *
3109 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3110 QualType KmpInt32Ty,
3111 QualType KmpRoutineEntryPointerQTy) {
3112 ASTContext &C = CGM.getContext();
3113 // Build struct kmp_task_t {
3114 // void * shareds;
3115 // kmp_routine_entry_t routine;
3116 // kmp_int32 part_id;
3117 // kmp_cmplrdata_t data1;
3118 // kmp_cmplrdata_t data2;
3119 // For taskloops additional fields:
3120 // kmp_uint64 lb;
3121 // kmp_uint64 ub;
3122 // kmp_int64 st;
3123 // kmp_int32 liter;
3124 // void * reductions;
3125 // };
3126 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3127 UD->startDefinition();
3128 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3129 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3130 UD->completeDefinition();
3131 QualType KmpCmplrdataTy = C.getRecordType(UD);
3132 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3133 RD->startDefinition();
3134 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3135 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3136 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3137 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3138 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3139 if (isOpenMPTaskLoopDirective(Kind)) {
3140 QualType KmpUInt64Ty =
3141 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3142 QualType KmpInt64Ty =
3143 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3144 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3145 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3146 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3147 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3148 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3150 RD->completeDefinition();
3151 return RD;
3154 static RecordDecl *
3155 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3156 ArrayRef<PrivateDataTy> Privates) {
3157 ASTContext &C = CGM.getContext();
3158 // Build struct kmp_task_t_with_privates {
3159 // kmp_task_t task_data;
3160 // .kmp_privates_t. privates;
3161 // };
3162 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3163 RD->startDefinition();
3164 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3165 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3166 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3167 RD->completeDefinition();
3168 return RD;
3171 /// Emit a proxy function which accepts kmp_task_t as the second
3172 /// argument.
3173 /// \code
3174 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3175 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3176 /// For taskloops:
3177 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3178 /// tt->reductions, tt->shareds);
3179 /// return 0;
3180 /// }
3181 /// \endcode
3182 static llvm::Function *
3183 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3184 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3185 QualType KmpTaskTWithPrivatesPtrQTy,
3186 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3187 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3188 llvm::Value *TaskPrivatesMap) {
3189 ASTContext &C = CGM.getContext();
3190 FunctionArgList Args;
3191 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3192 ImplicitParamDecl::Other);
3193 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3194 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3195 ImplicitParamDecl::Other);
3196 Args.push_back(&GtidArg);
3197 Args.push_back(&TaskTypeArg);
3198 const auto &TaskEntryFnInfo =
3199 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3200 llvm::FunctionType *TaskEntryTy =
3201 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3202 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3203 auto *TaskEntry = llvm::Function::Create(
3204 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3205 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3206 TaskEntry->setDoesNotRecurse();
3207 CodeGenFunction CGF(CGM);
3208 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3209 Loc, Loc);
3211 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3212 // tt,
3213 // For taskloops:
3214 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3215 // tt->task_data.shareds);
3216 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3217 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3218 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3219 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3220 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3221 const auto *KmpTaskTWithPrivatesQTyRD =
3222 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3223 LValue Base =
3224 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3225 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3226 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3227 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3228 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3230 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3231 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3232 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3233 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3234 CGF.ConvertTypeForMem(SharedsPtrTy));
3236 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3237 llvm::Value *PrivatesParam;
3238 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3239 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3240 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3241 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3242 } else {
3243 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3246 llvm::Value *CommonArgs[] = {
3247 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3248 CGF.Builder
3249 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3250 CGF.VoidPtrTy, CGF.Int8Ty)
3251 .getPointer()};
3252 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3253 std::end(CommonArgs));
3254 if (isOpenMPTaskLoopDirective(Kind)) {
3255 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3256 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3257 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3258 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3259 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3260 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3261 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3262 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3263 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3264 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3265 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3266 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3267 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3268 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3269 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3270 CallArgs.push_back(LBParam);
3271 CallArgs.push_back(UBParam);
3272 CallArgs.push_back(StParam);
3273 CallArgs.push_back(LIParam);
3274 CallArgs.push_back(RParam);
3276 CallArgs.push_back(SharedsParam);
3278 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3279 CallArgs);
3280 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3281 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3282 CGF.FinishFunction();
3283 return TaskEntry;
3286 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3287 SourceLocation Loc,
3288 QualType KmpInt32Ty,
3289 QualType KmpTaskTWithPrivatesPtrQTy,
3290 QualType KmpTaskTWithPrivatesQTy) {
3291 ASTContext &C = CGM.getContext();
3292 FunctionArgList Args;
3293 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3294 ImplicitParamDecl::Other);
3295 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3296 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3297 ImplicitParamDecl::Other);
3298 Args.push_back(&GtidArg);
3299 Args.push_back(&TaskTypeArg);
3300 const auto &DestructorFnInfo =
3301 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3302 llvm::FunctionType *DestructorFnTy =
3303 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3304 std::string Name =
3305 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3306 auto *DestructorFn =
3307 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3308 Name, &CGM.getModule());
3309 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3310 DestructorFnInfo);
3311 DestructorFn->setDoesNotRecurse();
3312 CodeGenFunction CGF(CGM);
3313 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3314 Args, Loc, Loc);
3316 LValue Base = CGF.EmitLoadOfPointerLValue(
3317 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3318 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3319 const auto *KmpTaskTWithPrivatesQTyRD =
3320 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3321 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3322 Base = CGF.EmitLValueForField(Base, *FI);
3323 for (const auto *Field :
3324 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3325 if (QualType::DestructionKind DtorKind =
3326 Field->getType().isDestructedType()) {
3327 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3328 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3331 CGF.FinishFunction();
3332 return DestructorFn;
3335 /// Emit a privates mapping function for correct handling of private and
3336 /// firstprivate variables.
3337 /// \code
3338 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3339 /// **noalias priv1,..., <tyn> **noalias privn) {
3340 /// *priv1 = &.privates.priv1;
3341 /// ...;
3342 /// *privn = &.privates.privn;
3343 /// }
3344 /// \endcode
3345 static llvm::Value *
3346 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3347 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3348 ArrayRef<PrivateDataTy> Privates) {
3349 ASTContext &C = CGM.getContext();
3350 FunctionArgList Args;
3351 ImplicitParamDecl TaskPrivatesArg(
3352 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3353 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3354 ImplicitParamDecl::Other);
3355 Args.push_back(&TaskPrivatesArg);
3356 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3357 unsigned Counter = 1;
3358 for (const Expr *E : Data.PrivateVars) {
3359 Args.push_back(ImplicitParamDecl::Create(
3360 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3361 C.getPointerType(C.getPointerType(E->getType()))
3362 .withConst()
3363 .withRestrict(),
3364 ImplicitParamDecl::Other));
3365 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3366 PrivateVarsPos[VD] = Counter;
3367 ++Counter;
3369 for (const Expr *E : Data.FirstprivateVars) {
3370 Args.push_back(ImplicitParamDecl::Create(
3371 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3372 C.getPointerType(C.getPointerType(E->getType()))
3373 .withConst()
3374 .withRestrict(),
3375 ImplicitParamDecl::Other));
3376 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3377 PrivateVarsPos[VD] = Counter;
3378 ++Counter;
3380 for (const Expr *E : Data.LastprivateVars) {
3381 Args.push_back(ImplicitParamDecl::Create(
3382 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3383 C.getPointerType(C.getPointerType(E->getType()))
3384 .withConst()
3385 .withRestrict(),
3386 ImplicitParamDecl::Other));
3387 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3388 PrivateVarsPos[VD] = Counter;
3389 ++Counter;
3391 for (const VarDecl *VD : Data.PrivateLocals) {
3392 QualType Ty = VD->getType().getNonReferenceType();
3393 if (VD->getType()->isLValueReferenceType())
3394 Ty = C.getPointerType(Ty);
3395 if (isAllocatableDecl(VD))
3396 Ty = C.getPointerType(Ty);
3397 Args.push_back(ImplicitParamDecl::Create(
3398 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3399 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3400 ImplicitParamDecl::Other));
3401 PrivateVarsPos[VD] = Counter;
3402 ++Counter;
3404 const auto &TaskPrivatesMapFnInfo =
3405 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3406 llvm::FunctionType *TaskPrivatesMapTy =
3407 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3408 std::string Name =
3409 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3410 auto *TaskPrivatesMap = llvm::Function::Create(
3411 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3412 &CGM.getModule());
3413 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3414 TaskPrivatesMapFnInfo);
3415 if (CGM.getLangOpts().Optimize) {
3416 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3417 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3418 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3420 CodeGenFunction CGF(CGM);
3421 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3422 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3424 // *privi = &.privates.privi;
3425 LValue Base = CGF.EmitLoadOfPointerLValue(
3426 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3427 TaskPrivatesArg.getType()->castAs<PointerType>());
3428 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3429 Counter = 0;
3430 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3431 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3432 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3433 LValue RefLVal =
3434 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3435 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3436 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3437 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3438 ++Counter;
3440 CGF.FinishFunction();
3441 return TaskPrivatesMap;
3444 /// Emit initialization for private variables in task-based directives.
3445 static void emitPrivatesInit(CodeGenFunction &CGF,
3446 const OMPExecutableDirective &D,
3447 Address KmpTaskSharedsPtr, LValue TDBase,
3448 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3449 QualType SharedsTy, QualType SharedsPtrTy,
3450 const OMPTaskDataTy &Data,
3451 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3452 ASTContext &C = CGF.getContext();
3453 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3454 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3455 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3456 ? OMPD_taskloop
3457 : OMPD_task;
3458 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3459 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3460 LValue SrcBase;
3461 bool IsTargetTask =
3462 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3463 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3464 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3465 // PointersArray, SizesArray, and MappersArray. The original variables for
3466 // these arrays are not captured and we get their addresses explicitly.
3467 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3468 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3469 SrcBase = CGF.MakeAddrLValue(
3470 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3471 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3472 CGF.ConvertTypeForMem(SharedsTy)),
3473 SharedsTy);
3475 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3476 for (const PrivateDataTy &Pair : Privates) {
3477 // Do not initialize private locals.
3478 if (Pair.second.isLocalPrivate()) {
3479 ++FI;
3480 continue;
3482 const VarDecl *VD = Pair.second.PrivateCopy;
3483 const Expr *Init = VD->getAnyInitializer();
3484 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3485 !CGF.isTrivialInitializer(Init)))) {
3486 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3487 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3488 const VarDecl *OriginalVD = Pair.second.Original;
3489 // Check if the variable is the target-based BasePointersArray,
3490 // PointersArray, SizesArray, or MappersArray.
3491 LValue SharedRefLValue;
3492 QualType Type = PrivateLValue.getType();
3493 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3494 if (IsTargetTask && !SharedField) {
3495 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3496 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3497 cast<CapturedDecl>(OriginalVD->getDeclContext())
3498 ->getNumParams() == 0 &&
3499 isa<TranslationUnitDecl>(
3500 cast<CapturedDecl>(OriginalVD->getDeclContext())
3501 ->getDeclContext()) &&
3502 "Expected artificial target data variable.");
3503 SharedRefLValue =
3504 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3505 } else if (ForDup) {
3506 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3507 SharedRefLValue = CGF.MakeAddrLValue(
3508 SharedRefLValue.getAddress(CGF).withAlignment(
3509 C.getDeclAlign(OriginalVD)),
3510 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3511 SharedRefLValue.getTBAAInfo());
3512 } else if (CGF.LambdaCaptureFields.count(
3513 Pair.second.Original->getCanonicalDecl()) > 0 ||
3514 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3515 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3516 } else {
3517 // Processing for implicitly captured variables.
3518 InlinedOpenMPRegionRAII Region(
3519 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3520 /*HasCancel=*/false, /*NoInheritance=*/true);
3521 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3523 if (Type->isArrayType()) {
3524 // Initialize firstprivate array.
3525 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3526 // Perform simple memcpy.
3527 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3528 } else {
3529 // Initialize firstprivate array using element-by-element
3530 // initialization.
3531 CGF.EmitOMPAggregateAssign(
3532 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3533 Type,
3534 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3535 Address SrcElement) {
3536 // Clean up any temporaries needed by the initialization.
3537 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3538 InitScope.addPrivate(Elem, SrcElement);
3539 (void)InitScope.Privatize();
3540 // Emit initialization for single element.
3541 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3542 CGF, &CapturesInfo);
3543 CGF.EmitAnyExprToMem(Init, DestElement,
3544 Init->getType().getQualifiers(),
3545 /*IsInitializer=*/false);
3548 } else {
3549 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3550 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3551 (void)InitScope.Privatize();
3552 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3553 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3554 /*capturedByInit=*/false);
3556 } else {
3557 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3560 ++FI;
3564 /// Check if duplication function is required for taskloops.
3565 static bool checkInitIsRequired(CodeGenFunction &CGF,
3566 ArrayRef<PrivateDataTy> Privates) {
3567 bool InitRequired = false;
3568 for (const PrivateDataTy &Pair : Privates) {
3569 if (Pair.second.isLocalPrivate())
3570 continue;
3571 const VarDecl *VD = Pair.second.PrivateCopy;
3572 const Expr *Init = VD->getAnyInitializer();
3573 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3574 !CGF.isTrivialInitializer(Init));
3575 if (InitRequired)
3576 break;
3578 return InitRequired;
3582 /// Emit task_dup function (for initialization of
3583 /// private/firstprivate/lastprivate vars and last_iter flag)
3584 /// \code
3585 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3586 /// lastpriv) {
3587 /// // setup lastprivate flag
3588 /// task_dst->last = lastpriv;
3589 /// // could be constructor calls here...
3590 /// }
3591 /// \endcode
3592 static llvm::Value *
3593 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3594 const OMPExecutableDirective &D,
3595 QualType KmpTaskTWithPrivatesPtrQTy,
3596 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3597 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3598 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3599 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3600 ASTContext &C = CGM.getContext();
3601 FunctionArgList Args;
3602 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3603 KmpTaskTWithPrivatesPtrQTy,
3604 ImplicitParamDecl::Other);
3605 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3606 KmpTaskTWithPrivatesPtrQTy,
3607 ImplicitParamDecl::Other);
3608 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3609 ImplicitParamDecl::Other);
3610 Args.push_back(&DstArg);
3611 Args.push_back(&SrcArg);
3612 Args.push_back(&LastprivArg);
3613 const auto &TaskDupFnInfo =
3614 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3615 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3616 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3617 auto *TaskDup = llvm::Function::Create(
3618 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3619 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3620 TaskDup->setDoesNotRecurse();
3621 CodeGenFunction CGF(CGM);
3622 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3623 Loc);
3625 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3626 CGF.GetAddrOfLocalVar(&DstArg),
3627 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3628 // task_dst->liter = lastpriv;
3629 if (WithLastIter) {
3630 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3631 LValue Base = CGF.EmitLValueForField(
3632 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3633 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3634 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3635 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3636 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3639 // Emit initial values for private copies (if any).
3640 assert(!Privates.empty());
3641 Address KmpTaskSharedsPtr = Address::invalid();
3642 if (!Data.FirstprivateVars.empty()) {
3643 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3644 CGF.GetAddrOfLocalVar(&SrcArg),
3645 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3646 LValue Base = CGF.EmitLValueForField(
3647 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3648 KmpTaskSharedsPtr = Address(
3649 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3650 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3651 KmpTaskTShareds)),
3652 Loc),
3653 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3655 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3656 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3657 CGF.FinishFunction();
3658 return TaskDup;
3661 /// Checks if destructor function is required to be generated.
3662 /// \return true if cleanups are required, false otherwise.
3663 static bool
3664 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3665 ArrayRef<PrivateDataTy> Privates) {
3666 for (const PrivateDataTy &P : Privates) {
3667 if (P.second.isLocalPrivate())
3668 continue;
3669 QualType Ty = P.second.Original->getType().getNonReferenceType();
3670 if (Ty.isDestructedType())
3671 return true;
3673 return false;
3676 namespace {
3677 /// Loop generator for OpenMP iterator expression.
3678 class OMPIteratorGeneratorScope final
3679 : public CodeGenFunction::OMPPrivateScope {
3680 CodeGenFunction &CGF;
3681 const OMPIteratorExpr *E = nullptr;
3682 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3683 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3684 OMPIteratorGeneratorScope() = delete;
3685 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3687 public:
3688 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3689 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3690 if (!E)
3691 return;
3692 SmallVector<llvm::Value *, 4> Uppers;
3693 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3694 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3695 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3696 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3697 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3698 addPrivate(
3699 HelperData.CounterVD,
3700 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3702 Privatize();
3704 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3705 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3706 LValue CLVal =
3707 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3708 HelperData.CounterVD->getType());
3709 // Counter = 0;
3710 CGF.EmitStoreOfScalar(
3711 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3712 CLVal);
3713 CodeGenFunction::JumpDest &ContDest =
3714 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3715 CodeGenFunction::JumpDest &ExitDest =
3716 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3717 // N = <number-of_iterations>;
3718 llvm::Value *N = Uppers[I];
3719 // cont:
3720 // if (Counter < N) goto body; else goto exit;
3721 CGF.EmitBlock(ContDest.getBlock());
3722 auto *CVal =
3723 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3724 llvm::Value *Cmp =
3725 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3726 ? CGF.Builder.CreateICmpSLT(CVal, N)
3727 : CGF.Builder.CreateICmpULT(CVal, N);
3728 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3729 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3730 // body:
3731 CGF.EmitBlock(BodyBB);
3732 // Iteri = Begini + Counter * Stepi;
3733 CGF.EmitIgnoredExpr(HelperData.Update);
3736 ~OMPIteratorGeneratorScope() {
3737 if (!E)
3738 return;
3739 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3740 // Counter = Counter + 1;
3741 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3742 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3743 // goto cont;
3744 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3745 // exit:
3746 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3750 } // namespace
3752 static std::pair<llvm::Value *, llvm::Value *>
3753 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3754 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3755 llvm::Value *Addr;
3756 if (OASE) {
3757 const Expr *Base = OASE->getBase();
3758 Addr = CGF.EmitScalarExpr(Base);
3759 } else {
3760 Addr = CGF.EmitLValue(E).getPointer(CGF);
3762 llvm::Value *SizeVal;
3763 QualType Ty = E->getType();
3764 if (OASE) {
3765 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3766 for (const Expr *SE : OASE->getDimensions()) {
3767 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3768 Sz = CGF.EmitScalarConversion(
3769 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3770 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3772 } else if (const auto *ASE =
3773 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3774 LValue UpAddrLVal =
3775 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
3776 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3777 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3778 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
3779 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3780 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3781 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3782 } else {
3783 SizeVal = CGF.getTypeSize(Ty);
3785 return std::make_pair(Addr, SizeVal);
3788 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3789 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3790 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3791 if (KmpTaskAffinityInfoTy.isNull()) {
3792 RecordDecl *KmpAffinityInfoRD =
3793 C.buildImplicitRecord("kmp_task_affinity_info_t");
3794 KmpAffinityInfoRD->startDefinition();
3795 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3796 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3797 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3798 KmpAffinityInfoRD->completeDefinition();
3799 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3803 CGOpenMPRuntime::TaskResultTy
3804 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3805 const OMPExecutableDirective &D,
3806 llvm::Function *TaskFunction, QualType SharedsTy,
3807 Address Shareds, const OMPTaskDataTy &Data) {
3808 ASTContext &C = CGM.getContext();
3809 llvm::SmallVector<PrivateDataTy, 4> Privates;
3810 // Aggregate privates and sort them by the alignment.
3811 const auto *I = Data.PrivateCopies.begin();
3812 for (const Expr *E : Data.PrivateVars) {
3813 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3814 Privates.emplace_back(
3815 C.getDeclAlign(VD),
3816 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3817 /*PrivateElemInit=*/nullptr));
3818 ++I;
3820 I = Data.FirstprivateCopies.begin();
3821 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3822 for (const Expr *E : Data.FirstprivateVars) {
3823 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3824 Privates.emplace_back(
3825 C.getDeclAlign(VD),
3826 PrivateHelpersTy(
3827 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3828 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3829 ++I;
3830 ++IElemInitRef;
3832 I = Data.LastprivateCopies.begin();
3833 for (const Expr *E : Data.LastprivateVars) {
3834 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3835 Privates.emplace_back(
3836 C.getDeclAlign(VD),
3837 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3838 /*PrivateElemInit=*/nullptr));
3839 ++I;
3841 for (const VarDecl *VD : Data.PrivateLocals) {
3842 if (isAllocatableDecl(VD))
3843 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3844 else
3845 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3847 llvm::stable_sort(Privates,
3848 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3849 return L.first > R.first;
3851 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3852 // Build type kmp_routine_entry_t (if not built yet).
3853 emitKmpRoutineEntryT(KmpInt32Ty);
3854 // Build type kmp_task_t (if not built yet).
3855 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3856 if (SavedKmpTaskloopTQTy.isNull()) {
3857 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3858 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3860 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3861 } else {
3862 assert((D.getDirectiveKind() == OMPD_task ||
3863 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3864 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3865 "Expected taskloop, task or target directive");
3866 if (SavedKmpTaskTQTy.isNull()) {
3867 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3868 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3870 KmpTaskTQTy = SavedKmpTaskTQTy;
3872 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3873 // Build particular struct kmp_task_t for the given task.
3874 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3875 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3876 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3877 QualType KmpTaskTWithPrivatesPtrQTy =
3878 C.getPointerType(KmpTaskTWithPrivatesQTy);
3879 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3880 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3881 KmpTaskTWithPrivatesTy->getPointerTo();
3882 llvm::Value *KmpTaskTWithPrivatesTySize =
3883 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3884 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3886 // Emit initial values for private copies (if any).
3887 llvm::Value *TaskPrivatesMap = nullptr;
3888 llvm::Type *TaskPrivatesMapTy =
3889 std::next(TaskFunction->arg_begin(), 3)->getType();
3890 if (!Privates.empty()) {
3891 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3892 TaskPrivatesMap =
3893 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3894 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3895 TaskPrivatesMap, TaskPrivatesMapTy);
3896 } else {
3897 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3898 cast<llvm::PointerType>(TaskPrivatesMapTy));
3900 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3901 // kmp_task_t *tt);
3902 llvm::Function *TaskEntry = emitProxyTaskFunction(
3903 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3904 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3905 TaskPrivatesMap);
3907 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3908 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3909 // kmp_routine_entry_t *task_entry);
3910 // Task flags. Format is taken from
3911 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3912 // description of kmp_tasking_flags struct.
3913 enum {
3914 TiedFlag = 0x1,
3915 FinalFlag = 0x2,
3916 DestructorsFlag = 0x8,
3917 PriorityFlag = 0x20,
3918 DetachableFlag = 0x40,
3920 unsigned Flags = Data.Tied ? TiedFlag : 0;
3921 bool NeedsCleanup = false;
3922 if (!Privates.empty()) {
3923 NeedsCleanup =
3924 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3925 if (NeedsCleanup)
3926 Flags = Flags | DestructorsFlag;
3928 if (Data.Priority.getInt())
3929 Flags = Flags | PriorityFlag;
3930 if (D.hasClausesOfKind<OMPDetachClause>())
3931 Flags = Flags | DetachableFlag;
3932 llvm::Value *TaskFlags =
3933 Data.Final.getPointer()
3934 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3935 CGF.Builder.getInt32(FinalFlag),
3936 CGF.Builder.getInt32(/*C=*/0))
3937 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3938 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3939 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3940 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3941 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3942 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3943 TaskEntry, KmpRoutineEntryPtrTy)};
3944 llvm::Value *NewTask;
3945 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3946 // Check if we have any device clause associated with the directive.
3947 const Expr *Device = nullptr;
3948 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3949 Device = C->getDevice();
3950 // Emit device ID if any otherwise use default value.
3951 llvm::Value *DeviceID;
3952 if (Device)
3953 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3954 CGF.Int64Ty, /*isSigned=*/true);
3955 else
3956 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3957 AllocArgs.push_back(DeviceID);
3958 NewTask = CGF.EmitRuntimeCall(
3959 OMPBuilder.getOrCreateRuntimeFunction(
3960 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3961 AllocArgs);
3962 } else {
3963 NewTask =
3964 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3965 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3966 AllocArgs);
3968 // Emit detach clause initialization.
3969 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3970 // task_descriptor);
3971 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3972 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3973 LValue EvtLVal = CGF.EmitLValue(Evt);
3975 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3976 // int gtid, kmp_task_t *task);
3977 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3978 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3979 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3980 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3981 OMPBuilder.getOrCreateRuntimeFunction(
3982 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3983 {Loc, Tid, NewTask});
3984 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3985 Evt->getExprLoc());
3986 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3988 // Process affinity clauses.
3989 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3990 // Process list of affinity data.
3991 ASTContext &C = CGM.getContext();
3992 Address AffinitiesArray = Address::invalid();
3993 // Calculate number of elements to form the array of affinity data.
3994 llvm::Value *NumOfElements = nullptr;
3995 unsigned NumAffinities = 0;
3996 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3997 if (const Expr *Modifier = C->getModifier()) {
3998 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3999 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4000 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4001 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4002 NumOfElements =
4003 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4005 } else {
4006 NumAffinities += C->varlist_size();
4009 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4010 // Fields ids in kmp_task_affinity_info record.
4011 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4013 QualType KmpTaskAffinityInfoArrayTy;
4014 if (NumOfElements) {
4015 NumOfElements = CGF.Builder.CreateNUWAdd(
4016 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4017 auto *OVE = new (C) OpaqueValueExpr(
4018 Loc,
4019 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4020 VK_PRValue);
4021 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4022 RValue::get(NumOfElements));
4023 KmpTaskAffinityInfoArrayTy =
4024 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4025 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4026 // Properly emit variable-sized array.
4027 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4028 ImplicitParamDecl::Other);
4029 CGF.EmitVarDecl(*PD);
4030 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4031 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4032 /*isSigned=*/false);
4033 } else {
4034 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4035 KmpTaskAffinityInfoTy,
4036 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4037 ArrayType::Normal, /*IndexTypeQuals=*/0);
4038 AffinitiesArray =
4039 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4040 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4041 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4042 /*isSigned=*/false);
4045 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4046 // Fill array by elements without iterators.
4047 unsigned Pos = 0;
4048 bool HasIterator = false;
4049 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4050 if (C->getModifier()) {
4051 HasIterator = true;
4052 continue;
4054 for (const Expr *E : C->varlists()) {
4055 llvm::Value *Addr;
4056 llvm::Value *Size;
4057 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4058 LValue Base =
4059 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4060 KmpTaskAffinityInfoTy);
4061 // affs[i].base_addr = &<Affinities[i].second>;
4062 LValue BaseAddrLVal = CGF.EmitLValueForField(
4063 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4064 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4065 BaseAddrLVal);
4066 // affs[i].len = sizeof(<Affinities[i].second>);
4067 LValue LenLVal = CGF.EmitLValueForField(
4068 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4069 CGF.EmitStoreOfScalar(Size, LenLVal);
4070 ++Pos;
4073 LValue PosLVal;
4074 if (HasIterator) {
4075 PosLVal = CGF.MakeAddrLValue(
4076 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4077 C.getSizeType());
4078 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4080 // Process elements with iterators.
4081 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4082 const Expr *Modifier = C->getModifier();
4083 if (!Modifier)
4084 continue;
4085 OMPIteratorGeneratorScope IteratorScope(
4086 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4087 for (const Expr *E : C->varlists()) {
4088 llvm::Value *Addr;
4089 llvm::Value *Size;
4090 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4091 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4092 LValue Base = CGF.MakeAddrLValue(
4093 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4094 // affs[i].base_addr = &<Affinities[i].second>;
4095 LValue BaseAddrLVal = CGF.EmitLValueForField(
4096 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4097 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4098 BaseAddrLVal);
4099 // affs[i].len = sizeof(<Affinities[i].second>);
4100 LValue LenLVal = CGF.EmitLValueForField(
4101 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4102 CGF.EmitStoreOfScalar(Size, LenLVal);
4103 Idx = CGF.Builder.CreateNUWAdd(
4104 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4105 CGF.EmitStoreOfScalar(Idx, PosLVal);
4108 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4109 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4110 // naffins, kmp_task_affinity_info_t *affin_list);
4111 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4112 llvm::Value *GTid = getThreadID(CGF, Loc);
4113 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4114 AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4115 // FIXME: Emit the function and ignore its result for now unless the
4116 // runtime function is properly implemented.
4117 (void)CGF.EmitRuntimeCall(
4118 OMPBuilder.getOrCreateRuntimeFunction(
4119 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4120 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4122 llvm::Value *NewTaskNewTaskTTy =
4123 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4124 NewTask, KmpTaskTWithPrivatesPtrTy);
4125 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4126 KmpTaskTWithPrivatesQTy);
4127 LValue TDBase =
4128 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4129 // Fill the data in the resulting kmp_task_t record.
4130 // Copy shareds if there are any.
4131 Address KmpTaskSharedsPtr = Address::invalid();
4132 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4133 KmpTaskSharedsPtr = Address(
4134 CGF.EmitLoadOfScalar(
4135 CGF.EmitLValueForField(
4136 TDBase,
4137 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4138 Loc),
4139 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4140 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4141 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4142 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4144 // Emit initial values for private copies (if any).
4145 TaskResultTy Result;
4146 if (!Privates.empty()) {
4147 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4148 SharedsTy, SharedsPtrTy, Data, Privates,
4149 /*ForDup=*/false);
4150 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4151 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4152 Result.TaskDupFn = emitTaskDupFunction(
4153 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4154 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4155 /*WithLastIter=*/!Data.LastprivateVars.empty());
4158 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4159 enum { Priority = 0, Destructors = 1 };
4160 // Provide pointer to function with destructors for privates.
4161 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4162 const RecordDecl *KmpCmplrdataUD =
4163 (*FI)->getType()->getAsUnionType()->getDecl();
4164 if (NeedsCleanup) {
4165 llvm::Value *DestructorFn = emitDestructorsFunction(
4166 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4167 KmpTaskTWithPrivatesQTy);
4168 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4169 LValue DestructorsLV = CGF.EmitLValueForField(
4170 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4171 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4172 DestructorFn, KmpRoutineEntryPtrTy),
4173 DestructorsLV);
4175 // Set priority.
4176 if (Data.Priority.getInt()) {
4177 LValue Data2LV = CGF.EmitLValueForField(
4178 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4179 LValue PriorityLV = CGF.EmitLValueForField(
4180 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4181 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4183 Result.NewTask = NewTask;
4184 Result.TaskEntry = TaskEntry;
4185 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4186 Result.TDBase = TDBase;
4187 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4188 return Result;
4191 /// Translates internal dependency kind into the runtime kind.
4192 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4193 RTLDependenceKindTy DepKind;
4194 switch (K) {
4195 case OMPC_DEPEND_in:
4196 DepKind = RTLDependenceKindTy::DepIn;
4197 break;
4198 // Out and InOut dependencies must use the same code.
4199 case OMPC_DEPEND_out:
4200 case OMPC_DEPEND_inout:
4201 DepKind = RTLDependenceKindTy::DepInOut;
4202 break;
4203 case OMPC_DEPEND_mutexinoutset:
4204 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4205 break;
4206 case OMPC_DEPEND_inoutset:
4207 DepKind = RTLDependenceKindTy::DepInOutSet;
4208 break;
4209 case OMPC_DEPEND_outallmemory:
4210 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4211 break;
4212 case OMPC_DEPEND_source:
4213 case OMPC_DEPEND_sink:
4214 case OMPC_DEPEND_depobj:
4215 case OMPC_DEPEND_inoutallmemory:
4216 case OMPC_DEPEND_unknown:
4217 llvm_unreachable("Unknown task dependence type");
4219 return DepKind;
4222 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4223 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4224 QualType &FlagsTy) {
4225 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4226 if (KmpDependInfoTy.isNull()) {
4227 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4228 KmpDependInfoRD->startDefinition();
4229 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4230 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4231 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4232 KmpDependInfoRD->completeDefinition();
4233 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4237 std::pair<llvm::Value *, LValue>
4238 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4239 SourceLocation Loc) {
4240 ASTContext &C = CGM.getContext();
4241 QualType FlagsTy;
4242 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4243 RecordDecl *KmpDependInfoRD =
4244 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4245 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4246 LValue Base = CGF.EmitLoadOfPointerLValue(
4247 CGF.Builder.CreateElementBitCast(
4248 DepobjLVal.getAddress(CGF),
4249 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4250 KmpDependInfoPtrTy->castAs<PointerType>());
4251 Address DepObjAddr = CGF.Builder.CreateGEP(
4252 Base.getAddress(CGF),
4253 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4254 LValue NumDepsBase = CGF.MakeAddrLValue(
4255 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4256 // NumDeps = deps[i].base_addr;
4257 LValue BaseAddrLVal = CGF.EmitLValueForField(
4258 NumDepsBase,
4259 *std::next(KmpDependInfoRD->field_begin(),
4260 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4261 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4262 return std::make_pair(NumDeps, Base);
4265 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4266 llvm::PointerUnion<unsigned *, LValue *> Pos,
4267 const OMPTaskDataTy::DependData &Data,
4268 Address DependenciesArray) {
4269 CodeGenModule &CGM = CGF.CGM;
4270 ASTContext &C = CGM.getContext();
4271 QualType FlagsTy;
4272 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4273 RecordDecl *KmpDependInfoRD =
4274 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4275 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4277 OMPIteratorGeneratorScope IteratorScope(
4278 CGF, cast_or_null<OMPIteratorExpr>(
4279 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4280 : nullptr));
4281 for (const Expr *E : Data.DepExprs) {
4282 llvm::Value *Addr;
4283 llvm::Value *Size;
4285 // The expression will be a nullptr in the 'omp_all_memory' case.
4286 if (E) {
4287 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4288 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4289 } else {
4290 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4291 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4293 LValue Base;
4294 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4295 Base = CGF.MakeAddrLValue(
4296 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4297 } else {
4298 assert(E && "Expected a non-null expression");
4299 LValue &PosLVal = *Pos.get<LValue *>();
4300 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4301 Base = CGF.MakeAddrLValue(
4302 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4304 // deps[i].base_addr = &<Dependencies[i].second>;
4305 LValue BaseAddrLVal = CGF.EmitLValueForField(
4306 Base,
4307 *std::next(KmpDependInfoRD->field_begin(),
4308 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4309 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4310 // deps[i].len = sizeof(<Dependencies[i].second>);
4311 LValue LenLVal = CGF.EmitLValueForField(
4312 Base, *std::next(KmpDependInfoRD->field_begin(),
4313 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4314 CGF.EmitStoreOfScalar(Size, LenLVal);
4315 // deps[i].flags = <Dependencies[i].first>;
4316 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4317 LValue FlagsLVal = CGF.EmitLValueForField(
4318 Base,
4319 *std::next(KmpDependInfoRD->field_begin(),
4320 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4321 CGF.EmitStoreOfScalar(
4322 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4323 FlagsLVal);
4324 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4325 ++(*P);
4326 } else {
4327 LValue &PosLVal = *Pos.get<LValue *>();
4328 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4329 Idx = CGF.Builder.CreateNUWAdd(Idx,
4330 llvm::ConstantInt::get(Idx->getType(), 1));
4331 CGF.EmitStoreOfScalar(Idx, PosLVal);
4336 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4337 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4338 const OMPTaskDataTy::DependData &Data) {
4339 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4340 "Expected depobj dependency kind.");
4341 SmallVector<llvm::Value *, 4> Sizes;
4342 SmallVector<LValue, 4> SizeLVals;
4343 ASTContext &C = CGF.getContext();
4345 OMPIteratorGeneratorScope IteratorScope(
4346 CGF, cast_or_null<OMPIteratorExpr>(
4347 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4348 : nullptr));
4349 for (const Expr *E : Data.DepExprs) {
4350 llvm::Value *NumDeps;
4351 LValue Base;
4352 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4353 std::tie(NumDeps, Base) =
4354 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4355 LValue NumLVal = CGF.MakeAddrLValue(
4356 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4357 C.getUIntPtrType());
4358 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4359 NumLVal.getAddress(CGF));
4360 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4361 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4362 CGF.EmitStoreOfScalar(Add, NumLVal);
4363 SizeLVals.push_back(NumLVal);
4366 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4367 llvm::Value *Size =
4368 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4369 Sizes.push_back(Size);
4371 return Sizes;
4374 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4375 QualType &KmpDependInfoTy,
4376 LValue PosLVal,
4377 const OMPTaskDataTy::DependData &Data,
4378 Address DependenciesArray) {
4379 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4380 "Expected depobj dependency kind.");
4381 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4383 OMPIteratorGeneratorScope IteratorScope(
4384 CGF, cast_or_null<OMPIteratorExpr>(
4385 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4386 : nullptr));
4387 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4388 const Expr *E = Data.DepExprs[I];
4389 llvm::Value *NumDeps;
4390 LValue Base;
4391 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4392 std::tie(NumDeps, Base) =
4393 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4395 // memcopy dependency data.
4396 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4397 ElSize,
4398 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4399 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4400 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4401 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4403 // Increase pos.
4404 // pos += size;
4405 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4406 CGF.EmitStoreOfScalar(Add, PosLVal);
4411 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4412 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4413 SourceLocation Loc) {
4414 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4415 return D.DepExprs.empty();
4417 return std::make_pair(nullptr, Address::invalid());
4418 // Process list of dependencies.
4419 ASTContext &C = CGM.getContext();
4420 Address DependenciesArray = Address::invalid();
4421 llvm::Value *NumOfElements = nullptr;
4422 unsigned NumDependencies = std::accumulate(
4423 Dependencies.begin(), Dependencies.end(), 0,
4424 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4425 return D.DepKind == OMPC_DEPEND_depobj
4427 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4429 QualType FlagsTy;
4430 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4431 bool HasDepobjDeps = false;
4432 bool HasRegularWithIterators = false;
4433 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4434 llvm::Value *NumOfRegularWithIterators =
4435 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4436 // Calculate number of depobj dependencies and regular deps with the
4437 // iterators.
4438 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4439 if (D.DepKind == OMPC_DEPEND_depobj) {
4440 SmallVector<llvm::Value *, 4> Sizes =
4441 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4442 for (llvm::Value *Size : Sizes) {
4443 NumOfDepobjElements =
4444 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4446 HasDepobjDeps = true;
4447 continue;
4449 // Include number of iterations, if any.
4451 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4452 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4453 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4454 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4455 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4456 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4457 NumOfRegularWithIterators =
4458 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4460 HasRegularWithIterators = true;
4461 continue;
4465 QualType KmpDependInfoArrayTy;
4466 if (HasDepobjDeps || HasRegularWithIterators) {
4467 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4468 /*isSigned=*/false);
4469 if (HasDepobjDeps) {
4470 NumOfElements =
4471 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4473 if (HasRegularWithIterators) {
4474 NumOfElements =
4475 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4477 auto *OVE = new (C) OpaqueValueExpr(
4478 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4479 VK_PRValue);
4480 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4481 RValue::get(NumOfElements));
4482 KmpDependInfoArrayTy =
4483 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4484 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4485 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4486 // Properly emit variable-sized array.
4487 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4488 ImplicitParamDecl::Other);
4489 CGF.EmitVarDecl(*PD);
4490 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4491 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4492 /*isSigned=*/false);
4493 } else {
4494 KmpDependInfoArrayTy = C.getConstantArrayType(
4495 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4496 ArrayType::Normal, /*IndexTypeQuals=*/0);
4497 DependenciesArray =
4498 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4499 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4500 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4501 /*isSigned=*/false);
4503 unsigned Pos = 0;
4504 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4505 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4506 Dependencies[I].IteratorExpr)
4507 continue;
4508 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4509 DependenciesArray);
4511 // Copy regular dependencies with iterators.
4512 LValue PosLVal = CGF.MakeAddrLValue(
4513 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4514 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4515 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4516 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4517 !Dependencies[I].IteratorExpr)
4518 continue;
4519 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4520 DependenciesArray);
4522 // Copy final depobj arrays without iterators.
4523 if (HasDepobjDeps) {
4524 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4525 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4526 continue;
4527 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4528 DependenciesArray);
4531 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4532 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4533 return std::make_pair(NumOfElements, DependenciesArray);
4536 Address CGOpenMPRuntime::emitDepobjDependClause(
4537 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4538 SourceLocation Loc) {
4539 if (Dependencies.DepExprs.empty())
4540 return Address::invalid();
4541 // Process list of dependencies.
4542 ASTContext &C = CGM.getContext();
4543 Address DependenciesArray = Address::invalid();
4544 unsigned NumDependencies = Dependencies.DepExprs.size();
4545 QualType FlagsTy;
4546 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4547 RecordDecl *KmpDependInfoRD =
4548 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4550 llvm::Value *Size;
4551 // Define type kmp_depend_info[<Dependencies.size()>];
4552 // For depobj reserve one extra element to store the number of elements.
4553 // It is required to handle depobj(x) update(in) construct.
4554 // kmp_depend_info[<Dependencies.size()>] deps;
4555 llvm::Value *NumDepsVal;
4556 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4557 if (const auto *IE =
4558 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4559 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4560 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4561 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4562 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4563 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4565 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4566 NumDepsVal);
4567 CharUnits SizeInBytes =
4568 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4569 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4570 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4571 NumDepsVal =
4572 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4573 } else {
4574 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4575 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4576 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4577 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4578 Size = CGM.getSize(Sz.alignTo(Align));
4579 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4581 // Need to allocate on the dynamic memory.
4582 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4583 // Use default allocator.
4584 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4585 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4587 llvm::Value *Addr =
4588 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4589 CGM.getModule(), OMPRTL___kmpc_alloc),
4590 Args, ".dep.arr.addr");
4591 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4592 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4593 Addr, KmpDependInfoLlvmTy->getPointerTo());
4594 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4595 // Write number of elements in the first element of array for depobj.
4596 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4597 // deps[i].base_addr = NumDependencies;
4598 LValue BaseAddrLVal = CGF.EmitLValueForField(
4599 Base,
4600 *std::next(KmpDependInfoRD->field_begin(),
4601 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4602 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4603 llvm::PointerUnion<unsigned *, LValue *> Pos;
4604 unsigned Idx = 1;
4605 LValue PosLVal;
4606 if (Dependencies.IteratorExpr) {
4607 PosLVal = CGF.MakeAddrLValue(
4608 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4609 C.getSizeType());
4610 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4611 /*IsInit=*/true);
4612 Pos = &PosLVal;
4613 } else {
4614 Pos = &Idx;
4616 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4617 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4618 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4619 CGF.Int8Ty);
4620 return DependenciesArray;
4623 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4624 SourceLocation Loc) {
4625 ASTContext &C = CGM.getContext();
4626 QualType FlagsTy;
4627 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4628 LValue Base = CGF.EmitLoadOfPointerLValue(
4629 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4630 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4631 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4632 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4633 CGF.ConvertTypeForMem(KmpDependInfoTy));
4634 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4635 Addr.getElementType(), Addr.getPointer(),
4636 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4637 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4638 CGF.VoidPtrTy);
4639 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4640 // Use default allocator.
4641 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4642 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4644 // _kmpc_free(gtid, addr, nullptr);
4645 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4646 CGM.getModule(), OMPRTL___kmpc_free),
4647 Args);
4650 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4651 OpenMPDependClauseKind NewDepKind,
4652 SourceLocation Loc) {
4653 ASTContext &C = CGM.getContext();
4654 QualType FlagsTy;
4655 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4656 RecordDecl *KmpDependInfoRD =
4657 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4658 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4659 llvm::Value *NumDeps;
4660 LValue Base;
4661 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4663 Address Begin = Base.getAddress(CGF);
4664 // Cast from pointer to array type to pointer to single element.
4665 llvm::Value *End = CGF.Builder.CreateGEP(
4666 Begin.getElementType(), Begin.getPointer(), NumDeps);
4667 // The basic structure here is a while-do loop.
4668 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4669 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4670 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4671 CGF.EmitBlock(BodyBB);
4672 llvm::PHINode *ElementPHI =
4673 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4674 ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4675 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4676 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4677 Base.getTBAAInfo());
4678 // deps[i].flags = NewDepKind;
4679 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4680 LValue FlagsLVal = CGF.EmitLValueForField(
4681 Base, *std::next(KmpDependInfoRD->field_begin(),
4682 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4683 CGF.EmitStoreOfScalar(
4684 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4685 FlagsLVal);
4687 // Shift the address forward by one element.
4688 Address ElementNext =
4689 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4690 ElementPHI->addIncoming(ElementNext.getPointer(),
4691 CGF.Builder.GetInsertBlock());
4692 llvm::Value *IsEmpty =
4693 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4694 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4695 // Done.
4696 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4699 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4700 const OMPExecutableDirective &D,
4701 llvm::Function *TaskFunction,
4702 QualType SharedsTy, Address Shareds,
4703 const Expr *IfCond,
4704 const OMPTaskDataTy &Data) {
4705 if (!CGF.HaveInsertPoint())
4706 return;
4708 TaskResultTy Result =
4709 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4710 llvm::Value *NewTask = Result.NewTask;
4711 llvm::Function *TaskEntry = Result.TaskEntry;
4712 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4713 LValue TDBase = Result.TDBase;
4714 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4715 // Process list of dependences.
4716 Address DependenciesArray = Address::invalid();
4717 llvm::Value *NumOfElements;
4718 std::tie(NumOfElements, DependenciesArray) =
4719 emitDependClause(CGF, Data.Dependences, Loc);
4721 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4722 // libcall.
4723 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4724 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4725 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4726 // list is not empty
4727 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4728 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4729 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4730 llvm::Value *DepTaskArgs[7];
4731 if (!Data.Dependences.empty()) {
4732 DepTaskArgs[0] = UpLoc;
4733 DepTaskArgs[1] = ThreadID;
4734 DepTaskArgs[2] = NewTask;
4735 DepTaskArgs[3] = NumOfElements;
4736 DepTaskArgs[4] = DependenciesArray.getPointer();
4737 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4738 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4740 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4741 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4742 if (!Data.Tied) {
4743 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4744 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4745 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4747 if (!Data.Dependences.empty()) {
4748 CGF.EmitRuntimeCall(
4749 OMPBuilder.getOrCreateRuntimeFunction(
4750 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4751 DepTaskArgs);
4752 } else {
4753 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4754 CGM.getModule(), OMPRTL___kmpc_omp_task),
4755 TaskArgs);
4757 // Check if parent region is untied and build return for untied task;
4758 if (auto *Region =
4759 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4760 Region->emitUntiedSwitch(CGF);
4763 llvm::Value *DepWaitTaskArgs[7];
4764 if (!Data.Dependences.empty()) {
4765 DepWaitTaskArgs[0] = UpLoc;
4766 DepWaitTaskArgs[1] = ThreadID;
4767 DepWaitTaskArgs[2] = NumOfElements;
4768 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4769 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4770 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4771 DepWaitTaskArgs[6] =
4772 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4774 auto &M = CGM.getModule();
4775 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4776 TaskEntry, &Data, &DepWaitTaskArgs,
4777 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4778 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4779 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4780 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4781 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4782 // is specified.
4783 if (!Data.Dependences.empty())
4784 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4785 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4786 DepWaitTaskArgs);
4787 // Call proxy_task_entry(gtid, new_task);
4788 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4789 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4790 Action.Enter(CGF);
4791 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4792 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4793 OutlinedFnArgs);
4796 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4797 // kmp_task_t *new_task);
4798 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4799 // kmp_task_t *new_task);
4800 RegionCodeGenTy RCG(CodeGen);
4801 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4802 M, OMPRTL___kmpc_omp_task_begin_if0),
4803 TaskArgs,
4804 OMPBuilder.getOrCreateRuntimeFunction(
4805 M, OMPRTL___kmpc_omp_task_complete_if0),
4806 TaskArgs);
4807 RCG.setAction(Action);
4808 RCG(CGF);
4811 if (IfCond) {
4812 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4813 } else {
4814 RegionCodeGenTy ThenRCG(ThenCodeGen);
4815 ThenRCG(CGF);
4819 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4820 const OMPLoopDirective &D,
4821 llvm::Function *TaskFunction,
4822 QualType SharedsTy, Address Shareds,
4823 const Expr *IfCond,
4824 const OMPTaskDataTy &Data) {
4825 if (!CGF.HaveInsertPoint())
4826 return;
4827 TaskResultTy Result =
4828 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4829 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4830 // libcall.
4831 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4832 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4833 // sched, kmp_uint64 grainsize, void *task_dup);
4834 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4835 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4836 llvm::Value *IfVal;
4837 if (IfCond) {
4838 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4839 /*isSigned=*/true);
4840 } else {
4841 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4844 LValue LBLVal = CGF.EmitLValueForField(
4845 Result.TDBase,
4846 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4847 const auto *LBVar =
4848 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4849 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
4850 LBLVal.getQuals(),
4851 /*IsInitializer=*/true);
4852 LValue UBLVal = CGF.EmitLValueForField(
4853 Result.TDBase,
4854 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4855 const auto *UBVar =
4856 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4857 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
4858 UBLVal.getQuals(),
4859 /*IsInitializer=*/true);
4860 LValue StLVal = CGF.EmitLValueForField(
4861 Result.TDBase,
4862 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4863 const auto *StVar =
4864 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4865 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
4866 StLVal.getQuals(),
4867 /*IsInitializer=*/true);
4868 // Store reductions address.
4869 LValue RedLVal = CGF.EmitLValueForField(
4870 Result.TDBase,
4871 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4872 if (Data.Reductions) {
4873 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4874 } else {
4875 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
4876 CGF.getContext().VoidPtrTy);
4878 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4879 llvm::Value *TaskArgs[] = {
4880 UpLoc,
4881 ThreadID,
4882 Result.NewTask,
4883 IfVal,
4884 LBLVal.getPointer(CGF),
4885 UBLVal.getPointer(CGF),
4886 CGF.EmitLoadOfScalar(StLVal, Loc),
4887 llvm::ConstantInt::getSigned(
4888 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4889 llvm::ConstantInt::getSigned(
4890 CGF.IntTy, Data.Schedule.getPointer()
4891 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4892 : NoSchedule),
4893 Data.Schedule.getPointer()
4894 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4895 /*isSigned=*/false)
4896 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4897 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4898 Result.TaskDupFn, CGF.VoidPtrTy)
4899 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4900 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4901 CGM.getModule(), OMPRTL___kmpc_taskloop),
4902 TaskArgs);
4905 /// Emit reduction operation for each element of array (required for
4906 /// array sections) LHS op = RHS.
4907 /// \param Type Type of array.
4908 /// \param LHSVar Variable on the left side of the reduction operation
4909 /// (references element of array in original variable).
4910 /// \param RHSVar Variable on the right side of the reduction operation
4911 /// (references element of array in original variable).
4912 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4913 /// RHSVar.
4914 static void EmitOMPAggregateReduction(
4915 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4916 const VarDecl *RHSVar,
4917 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4918 const Expr *, const Expr *)> &RedOpGen,
4919 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4920 const Expr *UpExpr = nullptr) {
4921 // Perform element-by-element initialization.
4922 QualType ElementTy;
4923 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4924 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4926 // Drill down to the base element type on both arrays.
4927 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4928 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4930 llvm::Value *RHSBegin = RHSAddr.getPointer();
4931 llvm::Value *LHSBegin = LHSAddr.getPointer();
4932 // Cast from pointer to array type to pointer to single element.
4933 llvm::Value *LHSEnd =
4934 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4935 // The basic structure here is a while-do loop.
4936 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4937 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4938 llvm::Value *IsEmpty =
4939 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4940 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4942 // Enter the loop body, making that address the current address.
4943 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4944 CGF.EmitBlock(BodyBB);
4946 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4948 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4949 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4950 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4951 Address RHSElementCurrent(
4952 RHSElementPHI, RHSAddr.getElementType(),
4953 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4955 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4956 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4957 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4958 Address LHSElementCurrent(
4959 LHSElementPHI, LHSAddr.getElementType(),
4960 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4962 // Emit copy.
4963 CodeGenFunction::OMPPrivateScope Scope(CGF);
4964 Scope.addPrivate(LHSVar, LHSElementCurrent);
4965 Scope.addPrivate(RHSVar, RHSElementCurrent);
4966 Scope.Privatize();
4967 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4968 Scope.ForceCleanup();
4970 // Shift the address forward by one element.
4971 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4972 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4973 "omp.arraycpy.dest.element");
4974 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4975 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4976 "omp.arraycpy.src.element");
4977 // Check whether we've reached the end.
4978 llvm::Value *Done =
4979 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4980 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4981 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4982 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4984 // Done.
4985 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4988 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4989 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4990 /// UDR combiner function.
4991 static void emitReductionCombiner(CodeGenFunction &CGF,
4992 const Expr *ReductionOp) {
4993 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4994 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4995 if (const auto *DRE =
4996 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4997 if (const auto *DRD =
4998 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4999 std::pair<llvm::Function *, llvm::Function *> Reduction =
5000 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5001 RValue Func = RValue::get(Reduction.first);
5002 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5003 CGF.EmitIgnoredExpr(ReductionOp);
5004 return;
5006 CGF.EmitIgnoredExpr(ReductionOp);
5009 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5010 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
5011 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5012 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5013 ASTContext &C = CGM.getContext();
5015 // void reduction_func(void *LHSArg, void *RHSArg);
5016 FunctionArgList Args;
5017 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5018 ImplicitParamDecl::Other);
5019 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5020 ImplicitParamDecl::Other);
5021 Args.push_back(&LHSArg);
5022 Args.push_back(&RHSArg);
5023 const auto &CGFI =
5024 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5025 std::string Name = getReductionFuncName(ReducerName);
5026 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5027 llvm::GlobalValue::InternalLinkage, Name,
5028 &CGM.getModule());
5029 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5030 Fn->setDoesNotRecurse();
5031 CodeGenFunction CGF(CGM);
5032 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5034 // Dst = (void*[n])(LHSArg);
5035 // Src = (void*[n])(RHSArg);
5036 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5037 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5038 ArgsElemType->getPointerTo()),
5039 ArgsElemType, CGF.getPointerAlign());
5040 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5041 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5042 ArgsElemType->getPointerTo()),
5043 ArgsElemType, CGF.getPointerAlign());
5045 // ...
5046 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5047 // ...
5048 CodeGenFunction::OMPPrivateScope Scope(CGF);
5049 const auto *IPriv = Privates.begin();
5050 unsigned Idx = 0;
5051 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5052 const auto *RHSVar =
5053 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5054 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5055 const auto *LHSVar =
5056 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5057 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5058 QualType PrivTy = (*IPriv)->getType();
5059 if (PrivTy->isVariablyModifiedType()) {
5060 // Get array size and emit VLA type.
5061 ++Idx;
5062 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5063 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5064 const VariableArrayType *VLA =
5065 CGF.getContext().getAsVariableArrayType(PrivTy);
5066 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5067 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5068 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5069 CGF.EmitVariablyModifiedType(PrivTy);
5072 Scope.Privatize();
5073 IPriv = Privates.begin();
5074 const auto *ILHS = LHSExprs.begin();
5075 const auto *IRHS = RHSExprs.begin();
5076 for (const Expr *E : ReductionOps) {
5077 if ((*IPriv)->getType()->isArrayType()) {
5078 // Emit reduction for array section.
5079 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5080 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5081 EmitOMPAggregateReduction(
5082 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5083 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5084 emitReductionCombiner(CGF, E);
5086 } else {
5087 // Emit reduction for array subscript or single variable.
5088 emitReductionCombiner(CGF, E);
5090 ++IPriv;
5091 ++ILHS;
5092 ++IRHS;
5094 Scope.ForceCleanup();
5095 CGF.FinishFunction();
5096 return Fn;
5099 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5100 const Expr *ReductionOp,
5101 const Expr *PrivateRef,
5102 const DeclRefExpr *LHS,
5103 const DeclRefExpr *RHS) {
5104 if (PrivateRef->getType()->isArrayType()) {
5105 // Emit reduction for array section.
5106 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5107 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5108 EmitOMPAggregateReduction(
5109 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5110 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5111 emitReductionCombiner(CGF, ReductionOp);
5113 } else {
5114 // Emit reduction for array subscript or single variable.
5115 emitReductionCombiner(CGF, ReductionOp);
5119 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5120 ArrayRef<const Expr *> Privates,
5121 ArrayRef<const Expr *> LHSExprs,
5122 ArrayRef<const Expr *> RHSExprs,
5123 ArrayRef<const Expr *> ReductionOps,
5124 ReductionOptionsTy Options) {
5125 if (!CGF.HaveInsertPoint())
5126 return;
5128 bool WithNowait = Options.WithNowait;
5129 bool SimpleReduction = Options.SimpleReduction;
5131 // Next code should be emitted for reduction:
5133 // static kmp_critical_name lock = { 0 };
5135 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5136 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5137 // ...
5138 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5139 // *(Type<n>-1*)rhs[<n>-1]);
5140 // }
5142 // ...
5143 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5144 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5145 // RedList, reduce_func, &<lock>)) {
5146 // case 1:
5147 // ...
5148 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5149 // ...
5150 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5151 // break;
5152 // case 2:
5153 // ...
5154 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5155 // ...
5156 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5157 // break;
5158 // default:;
5159 // }
5161 // if SimpleReduction is true, only the next code is generated:
5162 // ...
5163 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5164 // ...
5166 ASTContext &C = CGM.getContext();
5168 if (SimpleReduction) {
5169 CodeGenFunction::RunCleanupsScope Scope(CGF);
5170 const auto *IPriv = Privates.begin();
5171 const auto *ILHS = LHSExprs.begin();
5172 const auto *IRHS = RHSExprs.begin();
5173 for (const Expr *E : ReductionOps) {
5174 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5175 cast<DeclRefExpr>(*IRHS));
5176 ++IPriv;
5177 ++ILHS;
5178 ++IRHS;
5180 return;
5183 // 1. Build a list of reduction variables.
5184 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5185 auto Size = RHSExprs.size();
5186 for (const Expr *E : Privates) {
5187 if (E->getType()->isVariablyModifiedType())
5188 // Reserve place for array size.
5189 ++Size;
5191 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5192 QualType ReductionArrayTy =
5193 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5194 /*IndexTypeQuals=*/0);
5195 Address ReductionList =
5196 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5197 const auto *IPriv = Privates.begin();
5198 unsigned Idx = 0;
5199 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5200 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5201 CGF.Builder.CreateStore(
5202 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5203 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5204 Elem);
5205 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5206 // Store array size.
5207 ++Idx;
5208 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5209 llvm::Value *Size = CGF.Builder.CreateIntCast(
5210 CGF.getVLASize(
5211 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5212 .NumElts,
5213 CGF.SizeTy, /*isSigned=*/false);
5214 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5215 Elem);
5219 // 2. Emit reduce_func().
5220 llvm::Function *ReductionFn = emitReductionFunction(
5221 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5222 Privates, LHSExprs, RHSExprs, ReductionOps);
5224 // 3. Create static kmp_critical_name lock = { 0 };
5225 std::string Name = getName({"reduction"});
5226 llvm::Value *Lock = getCriticalRegionLock(Name);
5228 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5229 // RedList, reduce_func, &<lock>);
5230 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5231 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5232 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5233 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5234 ReductionList.getPointer(), CGF.VoidPtrTy);
5235 llvm::Value *Args[] = {
5236 IdentTLoc, // ident_t *<loc>
5237 ThreadId, // i32 <gtid>
5238 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5239 ReductionArrayTySize, // size_type sizeof(RedList)
5240 RL, // void *RedList
5241 ReductionFn, // void (*) (void *, void *) <reduce_func>
5242 Lock // kmp_critical_name *&<lock>
5244 llvm::Value *Res = CGF.EmitRuntimeCall(
5245 OMPBuilder.getOrCreateRuntimeFunction(
5246 CGM.getModule(),
5247 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5248 Args);
5250 // 5. Build switch(res)
5251 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5252 llvm::SwitchInst *SwInst =
5253 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5255 // 6. Build case 1:
5256 // ...
5257 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5258 // ...
5259 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5260 // break;
5261 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5262 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5263 CGF.EmitBlock(Case1BB);
5265 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5266 llvm::Value *EndArgs[] = {
5267 IdentTLoc, // ident_t *<loc>
5268 ThreadId, // i32 <gtid>
5269 Lock // kmp_critical_name *&<lock>
5271 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5272 CodeGenFunction &CGF, PrePostActionTy &Action) {
5273 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5274 const auto *IPriv = Privates.begin();
5275 const auto *ILHS = LHSExprs.begin();
5276 const auto *IRHS = RHSExprs.begin();
5277 for (const Expr *E : ReductionOps) {
5278 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5279 cast<DeclRefExpr>(*IRHS));
5280 ++IPriv;
5281 ++ILHS;
5282 ++IRHS;
5285 RegionCodeGenTy RCG(CodeGen);
5286 CommonActionTy Action(
5287 nullptr, std::nullopt,
5288 OMPBuilder.getOrCreateRuntimeFunction(
5289 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5290 : OMPRTL___kmpc_end_reduce),
5291 EndArgs);
5292 RCG.setAction(Action);
5293 RCG(CGF);
5295 CGF.EmitBranch(DefaultBB);
5297 // 7. Build case 2:
5298 // ...
5299 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5300 // ...
5301 // break;
5302 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5303 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5304 CGF.EmitBlock(Case2BB);
5306 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5307 CodeGenFunction &CGF, PrePostActionTy &Action) {
5308 const auto *ILHS = LHSExprs.begin();
5309 const auto *IRHS = RHSExprs.begin();
5310 const auto *IPriv = Privates.begin();
5311 for (const Expr *E : ReductionOps) {
5312 const Expr *XExpr = nullptr;
5313 const Expr *EExpr = nullptr;
5314 const Expr *UpExpr = nullptr;
5315 BinaryOperatorKind BO = BO_Comma;
5316 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5317 if (BO->getOpcode() == BO_Assign) {
5318 XExpr = BO->getLHS();
5319 UpExpr = BO->getRHS();
5322 // Try to emit update expression as a simple atomic.
5323 const Expr *RHSExpr = UpExpr;
5324 if (RHSExpr) {
5325 // Analyze RHS part of the whole expression.
5326 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5327 RHSExpr->IgnoreParenImpCasts())) {
5328 // If this is a conditional operator, analyze its condition for
5329 // min/max reduction operator.
5330 RHSExpr = ACO->getCond();
5332 if (const auto *BORHS =
5333 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5334 EExpr = BORHS->getRHS();
5335 BO = BORHS->getOpcode();
5338 if (XExpr) {
5339 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5340 auto &&AtomicRedGen = [BO, VD,
5341 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5342 const Expr *EExpr, const Expr *UpExpr) {
5343 LValue X = CGF.EmitLValue(XExpr);
5344 RValue E;
5345 if (EExpr)
5346 E = CGF.EmitAnyExpr(EExpr);
5347 CGF.EmitOMPAtomicSimpleUpdateExpr(
5348 X, E, BO, /*IsXLHSInRHSPart=*/true,
5349 llvm::AtomicOrdering::Monotonic, Loc,
5350 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5351 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5352 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5353 CGF.emitOMPSimpleStore(
5354 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5355 VD->getType().getNonReferenceType(), Loc);
5356 PrivateScope.addPrivate(VD, LHSTemp);
5357 (void)PrivateScope.Privatize();
5358 return CGF.EmitAnyExpr(UpExpr);
5361 if ((*IPriv)->getType()->isArrayType()) {
5362 // Emit atomic reduction for array section.
5363 const auto *RHSVar =
5364 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5365 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5366 AtomicRedGen, XExpr, EExpr, UpExpr);
5367 } else {
5368 // Emit atomic reduction for array subscript or single variable.
5369 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5371 } else {
5372 // Emit as a critical region.
5373 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5374 const Expr *, const Expr *) {
5375 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5376 std::string Name = RT.getName({"atomic_reduction"});
5377 RT.emitCriticalRegion(
5378 CGF, Name,
5379 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5380 Action.Enter(CGF);
5381 emitReductionCombiner(CGF, E);
5383 Loc);
5385 if ((*IPriv)->getType()->isArrayType()) {
5386 const auto *LHSVar =
5387 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5388 const auto *RHSVar =
5389 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5390 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5391 CritRedGen);
5392 } else {
5393 CritRedGen(CGF, nullptr, nullptr, nullptr);
5396 ++ILHS;
5397 ++IRHS;
5398 ++IPriv;
5401 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5402 if (!WithNowait) {
5403 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5404 llvm::Value *EndArgs[] = {
5405 IdentTLoc, // ident_t *<loc>
5406 ThreadId, // i32 <gtid>
5407 Lock // kmp_critical_name *&<lock>
5409 CommonActionTy Action(nullptr, std::nullopt,
5410 OMPBuilder.getOrCreateRuntimeFunction(
5411 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5412 EndArgs);
5413 AtomicRCG.setAction(Action);
5414 AtomicRCG(CGF);
5415 } else {
5416 AtomicRCG(CGF);
5419 CGF.EmitBranch(DefaultBB);
5420 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5423 /// Generates unique name for artificial threadprivate variables.
5424 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5425 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5426 const Expr *Ref) {
5427 SmallString<256> Buffer;
5428 llvm::raw_svector_ostream Out(Buffer);
5429 const clang::DeclRefExpr *DE;
5430 const VarDecl *D = ::getBaseDecl(Ref, DE);
5431 if (!D)
5432 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5433 D = D->getCanonicalDecl();
5434 std::string Name = CGM.getOpenMPRuntime().getName(
5435 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5436 Out << Prefix << Name << "_"
5437 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5438 return std::string(Out.str());
5441 /// Emits reduction initializer function:
5442 /// \code
5443 /// void @.red_init(void* %arg, void* %orig) {
5444 /// %0 = bitcast void* %arg to <type>*
5445 /// store <type> <init>, <type>* %0
5446 /// ret void
5447 /// }
5448 /// \endcode
5449 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5450 SourceLocation Loc,
5451 ReductionCodeGen &RCG, unsigned N) {
5452 ASTContext &C = CGM.getContext();
5453 QualType VoidPtrTy = C.VoidPtrTy;
5454 VoidPtrTy.addRestrict();
5455 FunctionArgList Args;
5456 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5457 ImplicitParamDecl::Other);
5458 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5459 ImplicitParamDecl::Other);
5460 Args.emplace_back(&Param);
5461 Args.emplace_back(&ParamOrig);
5462 const auto &FnInfo =
5463 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5464 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5465 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5466 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5467 Name, &CGM.getModule());
5468 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5469 Fn->setDoesNotRecurse();
5470 CodeGenFunction CGF(CGM);
5471 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5472 QualType PrivateType = RCG.getPrivateType(N);
5473 Address PrivateAddr = CGF.EmitLoadOfPointer(
5474 CGF.Builder.CreateElementBitCast(
5475 CGF.GetAddrOfLocalVar(&Param),
5476 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5477 C.getPointerType(PrivateType)->castAs<PointerType>());
5478 llvm::Value *Size = nullptr;
5479 // If the size of the reduction item is non-constant, load it from global
5480 // threadprivate variable.
5481 if (RCG.getSizes(N).second) {
5482 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5483 CGF, CGM.getContext().getSizeType(),
5484 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5485 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5486 CGM.getContext().getSizeType(), Loc);
5488 RCG.emitAggregateType(CGF, N, Size);
5489 Address OrigAddr = Address::invalid();
5490 // If initializer uses initializer from declare reduction construct, emit a
5491 // pointer to the address of the original reduction item (reuired by reduction
5492 // initializer)
5493 if (RCG.usesReductionInitializer(N)) {
5494 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5495 OrigAddr = CGF.EmitLoadOfPointer(
5496 SharedAddr,
5497 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5499 // Emit the initializer:
5500 // %0 = bitcast void* %arg to <type>*
5501 // store <type> <init>, <type>* %0
5502 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5503 [](CodeGenFunction &) { return false; });
5504 CGF.FinishFunction();
5505 return Fn;
5508 /// Emits reduction combiner function:
5509 /// \code
5510 /// void @.red_comb(void* %arg0, void* %arg1) {
5511 /// %lhs = bitcast void* %arg0 to <type>*
5512 /// %rhs = bitcast void* %arg1 to <type>*
5513 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5514 /// store <type> %2, <type>* %lhs
5515 /// ret void
5516 /// }
5517 /// \endcode
5518 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5519 SourceLocation Loc,
5520 ReductionCodeGen &RCG, unsigned N,
5521 const Expr *ReductionOp,
5522 const Expr *LHS, const Expr *RHS,
5523 const Expr *PrivateRef) {
5524 ASTContext &C = CGM.getContext();
5525 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5526 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5527 FunctionArgList Args;
5528 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5529 C.VoidPtrTy, ImplicitParamDecl::Other);
5530 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5531 ImplicitParamDecl::Other);
5532 Args.emplace_back(&ParamInOut);
5533 Args.emplace_back(&ParamIn);
5534 const auto &FnInfo =
5535 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5536 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5537 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5538 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5539 Name, &CGM.getModule());
5540 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5541 Fn->setDoesNotRecurse();
5542 CodeGenFunction CGF(CGM);
5543 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5544 llvm::Value *Size = nullptr;
5545 // If the size of the reduction item is non-constant, load it from global
5546 // threadprivate variable.
5547 if (RCG.getSizes(N).second) {
5548 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5549 CGF, CGM.getContext().getSizeType(),
5550 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5551 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5552 CGM.getContext().getSizeType(), Loc);
5554 RCG.emitAggregateType(CGF, N, Size);
5555 // Remap lhs and rhs variables to the addresses of the function arguments.
5556 // %lhs = bitcast void* %arg0 to <type>*
5557 // %rhs = bitcast void* %arg1 to <type>*
5558 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5559 PrivateScope.addPrivate(
5560 LHSVD,
5561 // Pull out the pointer to the variable.
5562 CGF.EmitLoadOfPointer(
5563 CGF.Builder.CreateElementBitCast(
5564 CGF.GetAddrOfLocalVar(&ParamInOut),
5565 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5566 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5567 PrivateScope.addPrivate(
5568 RHSVD,
5569 // Pull out the pointer to the variable.
5570 CGF.EmitLoadOfPointer(
5571 CGF.Builder.CreateElementBitCast(
5572 CGF.GetAddrOfLocalVar(&ParamIn),
5573 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5574 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5575 PrivateScope.Privatize();
5576 // Emit the combiner body:
5577 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5578 // store <type> %2, <type>* %lhs
5579 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5580 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5581 cast<DeclRefExpr>(RHS));
5582 CGF.FinishFunction();
5583 return Fn;
5586 /// Emits reduction finalizer function:
5587 /// \code
5588 /// void @.red_fini(void* %arg) {
5589 /// %0 = bitcast void* %arg to <type>*
5590 /// <destroy>(<type>* %0)
5591 /// ret void
5592 /// }
5593 /// \endcode
5594 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5595 SourceLocation Loc,
5596 ReductionCodeGen &RCG, unsigned N) {
5597 if (!RCG.needCleanups(N))
5598 return nullptr;
5599 ASTContext &C = CGM.getContext();
5600 FunctionArgList Args;
5601 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5602 ImplicitParamDecl::Other);
5603 Args.emplace_back(&Param);
5604 const auto &FnInfo =
5605 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5606 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5607 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5608 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5609 Name, &CGM.getModule());
5610 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5611 Fn->setDoesNotRecurse();
5612 CodeGenFunction CGF(CGM);
5613 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5614 Address PrivateAddr = CGF.EmitLoadOfPointer(
5615 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5616 llvm::Value *Size = nullptr;
5617 // If the size of the reduction item is non-constant, load it from global
5618 // threadprivate variable.
5619 if (RCG.getSizes(N).second) {
5620 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5621 CGF, CGM.getContext().getSizeType(),
5622 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5623 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5624 CGM.getContext().getSizeType(), Loc);
5626 RCG.emitAggregateType(CGF, N, Size);
5627 // Emit the finalizer body:
5628 // <destroy>(<type>* %0)
5629 RCG.emitCleanups(CGF, N, PrivateAddr);
5630 CGF.FinishFunction(Loc);
5631 return Fn;
5634 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5635 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5636 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5637 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5638 return nullptr;
5640 // Build typedef struct:
5641 // kmp_taskred_input {
5642 // void *reduce_shar; // shared reduction item
5643 // void *reduce_orig; // original reduction item used for initialization
5644 // size_t reduce_size; // size of data item
5645 // void *reduce_init; // data initialization routine
5646 // void *reduce_fini; // data finalization routine
5647 // void *reduce_comb; // data combiner routine
5648 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5649 // } kmp_taskred_input_t;
5650 ASTContext &C = CGM.getContext();
5651 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5652 RD->startDefinition();
5653 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5654 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5655 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5656 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5657 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5658 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5659 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5660 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5661 RD->completeDefinition();
5662 QualType RDType = C.getRecordType(RD);
5663 unsigned Size = Data.ReductionVars.size();
5664 llvm::APInt ArraySize(/*numBits=*/64, Size);
5665 QualType ArrayRDType = C.getConstantArrayType(
5666 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5667 // kmp_task_red_input_t .rd_input.[Size];
5668 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5669 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5670 Data.ReductionCopies, Data.ReductionOps);
5671 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5672 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5673 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5674 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5675 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5676 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5677 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5678 ".rd_input.gep.");
5679 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5680 // ElemLVal.reduce_shar = &Shareds[Cnt];
5681 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5682 RCG.emitSharedOrigLValue(CGF, Cnt);
5683 llvm::Value *CastedShared =
5684 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
5685 CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5686 // ElemLVal.reduce_orig = &Origs[Cnt];
5687 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5688 llvm::Value *CastedOrig =
5689 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
5690 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
5691 RCG.emitAggregateType(CGF, Cnt);
5692 llvm::Value *SizeValInChars;
5693 llvm::Value *SizeVal;
5694 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5695 // We use delayed creation/initialization for VLAs and array sections. It is
5696 // required because runtime does not provide the way to pass the sizes of
5697 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5698 // threadprivate global variables are used to store these values and use
5699 // them in the functions.
5700 bool DelayedCreation = !!SizeVal;
5701 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5702 /*isSigned=*/false);
5703 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5704 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5705 // ElemLVal.reduce_init = init;
5706 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5707 llvm::Value *InitAddr =
5708 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
5709 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5710 // ElemLVal.reduce_fini = fini;
5711 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5712 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5713 llvm::Value *FiniAddr = Fini
5714 ? CGF.EmitCastToVoidPtr(Fini)
5715 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5716 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5717 // ElemLVal.reduce_comb = comb;
5718 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5719 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
5720 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5721 RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
5722 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5723 // ElemLVal.flags = 0;
5724 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5725 if (DelayedCreation) {
5726 CGF.EmitStoreOfScalar(
5727 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5728 FlagsLVal);
5729 } else
5730 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
5731 FlagsLVal.getType());
5733 if (Data.IsReductionWithTaskMod) {
5734 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5735 // is_ws, int num, void *data);
5736 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5737 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5738 CGM.IntTy, /*isSigned=*/true);
5739 llvm::Value *Args[] = {
5740 IdentTLoc, GTid,
5741 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5742 /*isSigned=*/true),
5743 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5744 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5745 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5746 return CGF.EmitRuntimeCall(
5747 OMPBuilder.getOrCreateRuntimeFunction(
5748 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5749 Args);
5751 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5752 llvm::Value *Args[] = {
5753 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5754 /*isSigned=*/true),
5755 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5756 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5757 CGM.VoidPtrTy)};
5758 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5759 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5760 Args);
5763 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5764 SourceLocation Loc,
5765 bool IsWorksharingReduction) {
5766 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5767 // is_ws, int num, void *data);
5768 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5769 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5770 CGM.IntTy, /*isSigned=*/true);
5771 llvm::Value *Args[] = {IdentTLoc, GTid,
5772 llvm::ConstantInt::get(CGM.IntTy,
5773 IsWorksharingReduction ? 1 : 0,
5774 /*isSigned=*/true)};
5775 (void)CGF.EmitRuntimeCall(
5776 OMPBuilder.getOrCreateRuntimeFunction(
5777 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5778 Args);
5781 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5782 SourceLocation Loc,
5783 ReductionCodeGen &RCG,
5784 unsigned N) {
5785 auto Sizes = RCG.getSizes(N);
5786 // Emit threadprivate global variable if the type is non-constant
5787 // (Sizes.second = nullptr).
5788 if (Sizes.second) {
5789 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5790 /*isSigned=*/false);
5791 Address SizeAddr = getAddrOfArtificialThreadPrivate(
5792 CGF, CGM.getContext().getSizeType(),
5793 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5794 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5798 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5799 SourceLocation Loc,
5800 llvm::Value *ReductionsPtr,
5801 LValue SharedLVal) {
5802 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5803 // *d);
5804 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5805 CGM.IntTy,
5806 /*isSigned=*/true),
5807 ReductionsPtr,
5808 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5809 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5810 return Address(
5811 CGF.EmitRuntimeCall(
5812 OMPBuilder.getOrCreateRuntimeFunction(
5813 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5814 Args),
5815 CGF.Int8Ty, SharedLVal.getAlignment());
5818 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5819 const OMPTaskDataTy &Data) {
5820 if (!CGF.HaveInsertPoint())
5821 return;
5823 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5824 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5825 OMPBuilder.createTaskwait(CGF.Builder);
5826 } else {
5827 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5828 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5829 auto &M = CGM.getModule();
5830 Address DependenciesArray = Address::invalid();
5831 llvm::Value *NumOfElements;
5832 std::tie(NumOfElements, DependenciesArray) =
5833 emitDependClause(CGF, Data.Dependences, Loc);
5834 if (!Data.Dependences.empty()) {
5835 llvm::Value *DepWaitTaskArgs[7];
5836 DepWaitTaskArgs[0] = UpLoc;
5837 DepWaitTaskArgs[1] = ThreadID;
5838 DepWaitTaskArgs[2] = NumOfElements;
5839 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5840 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5841 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5842 DepWaitTaskArgs[6] =
5843 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5845 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5847 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5848 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5849 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5850 // kmp_int32 has_no_wait); if dependence info is specified.
5851 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5852 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5853 DepWaitTaskArgs);
5855 } else {
5857 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5858 // global_tid);
5859 llvm::Value *Args[] = {UpLoc, ThreadID};
5860 // Ignore return result until untied tasks are supported.
5861 CGF.EmitRuntimeCall(
5862 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5863 Args);
5867 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5868 Region->emitUntiedSwitch(CGF);
5871 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5872 OpenMPDirectiveKind InnerKind,
5873 const RegionCodeGenTy &CodeGen,
5874 bool HasCancel) {
5875 if (!CGF.HaveInsertPoint())
5876 return;
5877 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5878 InnerKind != OMPD_critical &&
5879 InnerKind != OMPD_master &&
5880 InnerKind != OMPD_masked);
5881 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5884 namespace {
5885 enum RTCancelKind {
5886 CancelNoreq = 0,
5887 CancelParallel = 1,
5888 CancelLoop = 2,
5889 CancelSections = 3,
5890 CancelTaskgroup = 4
5892 } // anonymous namespace
5894 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5895 RTCancelKind CancelKind = CancelNoreq;
5896 if (CancelRegion == OMPD_parallel)
5897 CancelKind = CancelParallel;
5898 else if (CancelRegion == OMPD_for)
5899 CancelKind = CancelLoop;
5900 else if (CancelRegion == OMPD_sections)
5901 CancelKind = CancelSections;
5902 else {
5903 assert(CancelRegion == OMPD_taskgroup);
5904 CancelKind = CancelTaskgroup;
5906 return CancelKind;
5909 void CGOpenMPRuntime::emitCancellationPointCall(
5910 CodeGenFunction &CGF, SourceLocation Loc,
5911 OpenMPDirectiveKind CancelRegion) {
5912 if (!CGF.HaveInsertPoint())
5913 return;
5914 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5915 // global_tid, kmp_int32 cncl_kind);
5916 if (auto *OMPRegionInfo =
5917 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5918 // For 'cancellation point taskgroup', the task region info may not have a
5919 // cancel. This may instead happen in another adjacent task.
5920 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5921 llvm::Value *Args[] = {
5922 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5923 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5924 // Ignore return result until untied tasks are supported.
5925 llvm::Value *Result = CGF.EmitRuntimeCall(
5926 OMPBuilder.getOrCreateRuntimeFunction(
5927 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5928 Args);
5929 // if (__kmpc_cancellationpoint()) {
5930 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5931 // exit from construct;
5932 // }
5933 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5934 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5935 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5936 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5937 CGF.EmitBlock(ExitBB);
5938 if (CancelRegion == OMPD_parallel)
5939 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5940 // exit from construct;
5941 CodeGenFunction::JumpDest CancelDest =
5942 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5943 CGF.EmitBranchThroughCleanup(CancelDest);
5944 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5949 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5950 const Expr *IfCond,
5951 OpenMPDirectiveKind CancelRegion) {
5952 if (!CGF.HaveInsertPoint())
5953 return;
5954 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5955 // kmp_int32 cncl_kind);
5956 auto &M = CGM.getModule();
5957 if (auto *OMPRegionInfo =
5958 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5959 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5960 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5961 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5962 llvm::Value *Args[] = {
5963 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5964 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5965 // Ignore return result until untied tasks are supported.
5966 llvm::Value *Result = CGF.EmitRuntimeCall(
5967 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5968 // if (__kmpc_cancel()) {
5969 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5970 // exit from construct;
5971 // }
5972 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5973 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5974 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5975 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5976 CGF.EmitBlock(ExitBB);
5977 if (CancelRegion == OMPD_parallel)
5978 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5979 // exit from construct;
5980 CodeGenFunction::JumpDest CancelDest =
5981 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5982 CGF.EmitBranchThroughCleanup(CancelDest);
5983 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5985 if (IfCond) {
5986 emitIfClause(CGF, IfCond, ThenGen,
5987 [](CodeGenFunction &, PrePostActionTy &) {});
5988 } else {
5989 RegionCodeGenTy ThenRCG(ThenGen);
5990 ThenRCG(CGF);
5995 namespace {
5996 /// Cleanup action for uses_allocators support.
5997 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5998 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6000 public:
6001 OMPUsesAllocatorsActionTy(
6002 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6003 : Allocators(Allocators) {}
6004 void Enter(CodeGenFunction &CGF) override {
6005 if (!CGF.HaveInsertPoint())
6006 return;
6007 for (const auto &AllocatorData : Allocators) {
6008 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6009 CGF, AllocatorData.first, AllocatorData.second);
6012 void Exit(CodeGenFunction &CGF) override {
6013 if (!CGF.HaveInsertPoint())
6014 return;
6015 for (const auto &AllocatorData : Allocators) {
6016 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6017 AllocatorData.first);
6021 } // namespace
6023 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6024 const OMPExecutableDirective &D, StringRef ParentName,
6025 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6026 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6027 assert(!ParentName.empty() && "Invalid target entry parent name!");
6028 HasEmittedTargetRegion = true;
6029 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6030 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6031 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6032 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6033 if (!D.AllocatorTraits)
6034 continue;
6035 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6038 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6039 CodeGen.setAction(UsesAllocatorAction);
6040 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6041 IsOffloadEntry, CodeGen);
6044 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6045 const Expr *Allocator,
6046 const Expr *AllocatorTraits) {
6047 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6048 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6049 // Use default memspace handle.
6050 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6051 llvm::Value *NumTraits = llvm::ConstantInt::get(
6052 CGF.IntTy, cast<ConstantArrayType>(
6053 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6054 ->getSize()
6055 .getLimitedValue());
6056 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6057 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6058 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6059 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6060 AllocatorTraitsLVal.getBaseInfo(),
6061 AllocatorTraitsLVal.getTBAAInfo());
6062 llvm::Value *Traits = Addr.getPointer();
6064 llvm::Value *AllocatorVal =
6065 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6066 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6067 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6068 // Store to allocator.
6069 CGF.EmitAutoVarAlloca(*cast<VarDecl>(
6070 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6071 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6072 AllocatorVal =
6073 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6074 Allocator->getType(), Allocator->getExprLoc());
6075 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6078 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6079 const Expr *Allocator) {
6080 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6081 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6082 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6083 llvm::Value *AllocatorVal =
6084 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6085 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6086 CGF.getContext().VoidPtrTy,
6087 Allocator->getExprLoc());
6088 (void)CGF.EmitRuntimeCall(
6089 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6090 OMPRTL___kmpc_destroy_allocator),
6091 {ThreadId, AllocatorVal});
6094 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6095 const OMPExecutableDirective &D, StringRef ParentName,
6096 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6097 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6099 llvm::TargetRegionEntryInfo EntryInfo =
6100 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6102 CodeGenFunction CGF(CGM, true);
6103 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6104 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6105 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6107 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6108 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6109 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6112 // Get NumTeams and ThreadLimit attributes
6113 int32_t DefaultValTeams = -1;
6114 int32_t DefaultValThreads = -1;
6115 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6116 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6118 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
6119 DefaultValTeams, DefaultValThreads,
6120 IsOffloadEntry, OutlinedFn, OutlinedFnID);
6122 if (OutlinedFn != nullptr)
6123 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6126 /// Checks if the expression is constant or does not have non-trivial function
6127 /// calls.
6128 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6129 // We can skip constant expressions.
6130 // We can skip expressions with trivial calls or simple expressions.
6131 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6132 !E->hasNonTrivialCall(Ctx)) &&
6133 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6136 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6137 const Stmt *Body) {
6138 const Stmt *Child = Body->IgnoreContainers();
6139 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6140 Child = nullptr;
6141 for (const Stmt *S : C->body()) {
6142 if (const auto *E = dyn_cast<Expr>(S)) {
6143 if (isTrivial(Ctx, E))
6144 continue;
6146 // Some of the statements can be ignored.
6147 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6148 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6149 continue;
6150 // Analyze declarations.
6151 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6152 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6153 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6154 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6155 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6156 isa<UsingDirectiveDecl>(D) ||
6157 isa<OMPDeclareReductionDecl>(D) ||
6158 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6159 return true;
6160 const auto *VD = dyn_cast<VarDecl>(D);
6161 if (!VD)
6162 return false;
6163 return VD->hasGlobalStorage() || !VD->isUsed();
6165 continue;
6167 // Found multiple children - cannot get the one child only.
6168 if (Child)
6169 return nullptr;
6170 Child = S;
6172 if (Child)
6173 Child = Child->IgnoreContainers();
6175 return Child;
6178 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6179 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6180 int32_t &DefaultVal) {
6182 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6183 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6184 "Expected target-based executable directive.");
6185 switch (DirectiveKind) {
6186 case OMPD_target: {
6187 const auto *CS = D.getInnermostCapturedStmt();
6188 const auto *Body =
6189 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6190 const Stmt *ChildStmt =
6191 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6192 if (const auto *NestedDir =
6193 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6194 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6195 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6196 const Expr *NumTeams =
6197 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6198 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6199 if (auto Constant =
6200 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6201 DefaultVal = Constant->getExtValue();
6202 return NumTeams;
6204 DefaultVal = 0;
6205 return nullptr;
6207 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6208 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6209 DefaultVal = 1;
6210 return nullptr;
6212 DefaultVal = 1;
6213 return nullptr;
6215 // A value of -1 is used to check if we need to emit no teams region
6216 DefaultVal = -1;
6217 return nullptr;
6219 case OMPD_target_teams:
6220 case OMPD_target_teams_distribute:
6221 case OMPD_target_teams_distribute_simd:
6222 case OMPD_target_teams_distribute_parallel_for:
6223 case OMPD_target_teams_distribute_parallel_for_simd: {
6224 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6225 const Expr *NumTeams =
6226 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6227 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6228 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6229 DefaultVal = Constant->getExtValue();
6230 return NumTeams;
6232 DefaultVal = 0;
6233 return nullptr;
6235 case OMPD_target_parallel:
6236 case OMPD_target_parallel_for:
6237 case OMPD_target_parallel_for_simd:
6238 case OMPD_target_simd:
6239 DefaultVal = 1;
6240 return nullptr;
6241 case OMPD_parallel:
6242 case OMPD_for:
6243 case OMPD_parallel_for:
6244 case OMPD_parallel_master:
6245 case OMPD_parallel_sections:
6246 case OMPD_for_simd:
6247 case OMPD_parallel_for_simd:
6248 case OMPD_cancel:
6249 case OMPD_cancellation_point:
6250 case OMPD_ordered:
6251 case OMPD_threadprivate:
6252 case OMPD_allocate:
6253 case OMPD_task:
6254 case OMPD_simd:
6255 case OMPD_tile:
6256 case OMPD_unroll:
6257 case OMPD_sections:
6258 case OMPD_section:
6259 case OMPD_single:
6260 case OMPD_master:
6261 case OMPD_critical:
6262 case OMPD_taskyield:
6263 case OMPD_barrier:
6264 case OMPD_taskwait:
6265 case OMPD_taskgroup:
6266 case OMPD_atomic:
6267 case OMPD_flush:
6268 case OMPD_depobj:
6269 case OMPD_scan:
6270 case OMPD_teams:
6271 case OMPD_target_data:
6272 case OMPD_target_exit_data:
6273 case OMPD_target_enter_data:
6274 case OMPD_distribute:
6275 case OMPD_distribute_simd:
6276 case OMPD_distribute_parallel_for:
6277 case OMPD_distribute_parallel_for_simd:
6278 case OMPD_teams_distribute:
6279 case OMPD_teams_distribute_simd:
6280 case OMPD_teams_distribute_parallel_for:
6281 case OMPD_teams_distribute_parallel_for_simd:
6282 case OMPD_target_update:
6283 case OMPD_declare_simd:
6284 case OMPD_declare_variant:
6285 case OMPD_begin_declare_variant:
6286 case OMPD_end_declare_variant:
6287 case OMPD_declare_target:
6288 case OMPD_end_declare_target:
6289 case OMPD_declare_reduction:
6290 case OMPD_declare_mapper:
6291 case OMPD_taskloop:
6292 case OMPD_taskloop_simd:
6293 case OMPD_master_taskloop:
6294 case OMPD_master_taskloop_simd:
6295 case OMPD_parallel_master_taskloop:
6296 case OMPD_parallel_master_taskloop_simd:
6297 case OMPD_requires:
6298 case OMPD_metadirective:
6299 case OMPD_unknown:
6300 break;
6301 default:
6302 break;
6304 llvm_unreachable("Unexpected directive kind.");
6307 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6308 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6309 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6310 "Clauses associated with the teams directive expected to be emitted "
6311 "only for the host!");
6312 CGBuilderTy &Bld = CGF.Builder;
6313 int32_t DefaultNT = -1;
6314 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6315 if (NumTeams != nullptr) {
6316 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6318 switch (DirectiveKind) {
6319 case OMPD_target: {
6320 const auto *CS = D.getInnermostCapturedStmt();
6321 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6322 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6323 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6324 /*IgnoreResultAssign*/ true);
6325 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6326 /*isSigned=*/true);
6328 case OMPD_target_teams:
6329 case OMPD_target_teams_distribute:
6330 case OMPD_target_teams_distribute_simd:
6331 case OMPD_target_teams_distribute_parallel_for:
6332 case OMPD_target_teams_distribute_parallel_for_simd: {
6333 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6334 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6335 /*IgnoreResultAssign*/ true);
6336 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6337 /*isSigned=*/true);
6339 default:
6340 break;
6344 return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
6347 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6348 llvm::Value *DefaultThreadLimitVal) {
6349 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6350 CGF.getContext(), CS->getCapturedStmt());
6351 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6352 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6353 llvm::Value *NumThreads = nullptr;
6354 llvm::Value *CondVal = nullptr;
6355 // Handle if clause. If if clause present, the number of threads is
6356 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6357 if (Dir->hasClausesOfKind<OMPIfClause>()) {
6358 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6359 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6360 const OMPIfClause *IfClause = nullptr;
6361 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6362 if (C->getNameModifier() == OMPD_unknown ||
6363 C->getNameModifier() == OMPD_parallel) {
6364 IfClause = C;
6365 break;
6368 if (IfClause) {
6369 const Expr *Cond = IfClause->getCondition();
6370 bool Result;
6371 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6372 if (!Result)
6373 return CGF.Builder.getInt32(1);
6374 } else {
6375 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6376 if (const auto *PreInit =
6377 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6378 for (const auto *I : PreInit->decls()) {
6379 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6380 CGF.EmitVarDecl(cast<VarDecl>(*I));
6381 } else {
6382 CodeGenFunction::AutoVarEmission Emission =
6383 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6384 CGF.EmitAutoVarCleanups(Emission);
6388 CondVal = CGF.EvaluateExprAsBool(Cond);
6392 // Check the value of num_threads clause iff if clause was not specified
6393 // or is not evaluated to false.
6394 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6395 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6396 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6397 const auto *NumThreadsClause =
6398 Dir->getSingleClause<OMPNumThreadsClause>();
6399 CodeGenFunction::LexicalScope Scope(
6400 CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6401 if (const auto *PreInit =
6402 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6403 for (const auto *I : PreInit->decls()) {
6404 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6405 CGF.EmitVarDecl(cast<VarDecl>(*I));
6406 } else {
6407 CodeGenFunction::AutoVarEmission Emission =
6408 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6409 CGF.EmitAutoVarCleanups(Emission);
6413 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6414 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6415 /*isSigned=*/false);
6416 if (DefaultThreadLimitVal)
6417 NumThreads = CGF.Builder.CreateSelect(
6418 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6419 DefaultThreadLimitVal, NumThreads);
6420 } else {
6421 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6422 : CGF.Builder.getInt32(0);
6424 // Process condition of the if clause.
6425 if (CondVal) {
6426 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6427 CGF.Builder.getInt32(1));
6429 return NumThreads;
6431 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6432 return CGF.Builder.getInt32(1);
6434 return DefaultThreadLimitVal;
6437 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6438 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6439 int32_t &DefaultVal) {
6440 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6441 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6442 "Expected target-based executable directive.");
6444 switch (DirectiveKind) {
6445 case OMPD_target:
6446 // Teams have no clause thread_limit
6447 return nullptr;
6448 case OMPD_target_teams:
6449 case OMPD_target_teams_distribute:
6450 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6451 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6452 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6453 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6454 if (auto Constant =
6455 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6456 DefaultVal = Constant->getExtValue();
6457 return ThreadLimit;
6459 return nullptr;
6460 case OMPD_target_parallel:
6461 case OMPD_target_parallel_for:
6462 case OMPD_target_parallel_for_simd:
6463 case OMPD_target_teams_distribute_parallel_for:
6464 case OMPD_target_teams_distribute_parallel_for_simd: {
6465 Expr *ThreadLimit = nullptr;
6466 Expr *NumThreads = nullptr;
6467 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6468 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6469 ThreadLimit = ThreadLimitClause->getThreadLimit();
6470 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6471 if (auto Constant =
6472 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6473 DefaultVal = Constant->getExtValue();
6475 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6476 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6477 NumThreads = NumThreadsClause->getNumThreads();
6478 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6479 if (auto Constant =
6480 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6481 if (Constant->getExtValue() < DefaultVal) {
6482 DefaultVal = Constant->getExtValue();
6483 ThreadLimit = NumThreads;
6488 return ThreadLimit;
6490 case OMPD_target_teams_distribute_simd:
6491 case OMPD_target_simd:
6492 DefaultVal = 1;
6493 return nullptr;
6494 case OMPD_parallel:
6495 case OMPD_for:
6496 case OMPD_parallel_for:
6497 case OMPD_parallel_master:
6498 case OMPD_parallel_sections:
6499 case OMPD_for_simd:
6500 case OMPD_parallel_for_simd:
6501 case OMPD_cancel:
6502 case OMPD_cancellation_point:
6503 case OMPD_ordered:
6504 case OMPD_threadprivate:
6505 case OMPD_allocate:
6506 case OMPD_task:
6507 case OMPD_simd:
6508 case OMPD_tile:
6509 case OMPD_unroll:
6510 case OMPD_sections:
6511 case OMPD_section:
6512 case OMPD_single:
6513 case OMPD_master:
6514 case OMPD_critical:
6515 case OMPD_taskyield:
6516 case OMPD_barrier:
6517 case OMPD_taskwait:
6518 case OMPD_taskgroup:
6519 case OMPD_atomic:
6520 case OMPD_flush:
6521 case OMPD_depobj:
6522 case OMPD_scan:
6523 case OMPD_teams:
6524 case OMPD_target_data:
6525 case OMPD_target_exit_data:
6526 case OMPD_target_enter_data:
6527 case OMPD_distribute:
6528 case OMPD_distribute_simd:
6529 case OMPD_distribute_parallel_for:
6530 case OMPD_distribute_parallel_for_simd:
6531 case OMPD_teams_distribute:
6532 case OMPD_teams_distribute_simd:
6533 case OMPD_teams_distribute_parallel_for:
6534 case OMPD_teams_distribute_parallel_for_simd:
6535 case OMPD_target_update:
6536 case OMPD_declare_simd:
6537 case OMPD_declare_variant:
6538 case OMPD_begin_declare_variant:
6539 case OMPD_end_declare_variant:
6540 case OMPD_declare_target:
6541 case OMPD_end_declare_target:
6542 case OMPD_declare_reduction:
6543 case OMPD_declare_mapper:
6544 case OMPD_taskloop:
6545 case OMPD_taskloop_simd:
6546 case OMPD_master_taskloop:
6547 case OMPD_master_taskloop_simd:
6548 case OMPD_parallel_master_taskloop:
6549 case OMPD_parallel_master_taskloop_simd:
6550 case OMPD_requires:
6551 case OMPD_unknown:
6552 break;
6553 default:
6554 break;
6556 llvm_unreachable("Unsupported directive kind.");
6559 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6560 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6561 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6562 "Clauses associated with the teams directive expected to be emitted "
6563 "only for the host!");
6564 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6565 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6566 "Expected target-based executable directive.");
6567 CGBuilderTy &Bld = CGF.Builder;
6568 llvm::Value *ThreadLimitVal = nullptr;
6569 llvm::Value *NumThreadsVal = nullptr;
6570 switch (DirectiveKind) {
6571 case OMPD_target: {
6572 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6573 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6574 return NumThreads;
6575 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6576 CGF.getContext(), CS->getCapturedStmt());
6577 // TODO: The standard is not clear how to resolve two thread limit clauses,
6578 // let's pick the teams one if it's present, otherwise the target one.
6579 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6580 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6581 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6582 ThreadLimitClause = TLC;
6583 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6584 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6585 CodeGenFunction::LexicalScope Scope(
6586 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6587 if (const auto *PreInit =
6588 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6589 for (const auto *I : PreInit->decls()) {
6590 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6591 CGF.EmitVarDecl(cast<VarDecl>(*I));
6592 } else {
6593 CodeGenFunction::AutoVarEmission Emission =
6594 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6595 CGF.EmitAutoVarCleanups(Emission);
6601 if (ThreadLimitClause) {
6602 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6603 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6604 ThreadLimitVal =
6605 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6607 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6608 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6609 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6610 CS = Dir->getInnermostCapturedStmt();
6611 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6612 CGF.getContext(), CS->getCapturedStmt());
6613 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6615 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6616 !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6617 CS = Dir->getInnermostCapturedStmt();
6618 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6619 return NumThreads;
6621 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6622 return Bld.getInt32(1);
6624 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6626 case OMPD_target_teams: {
6627 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6628 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6629 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6630 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6631 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6632 ThreadLimitVal =
6633 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6635 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6636 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6637 return NumThreads;
6638 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6639 CGF.getContext(), CS->getCapturedStmt());
6640 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6641 if (Dir->getDirectiveKind() == OMPD_distribute) {
6642 CS = Dir->getInnermostCapturedStmt();
6643 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6644 return NumThreads;
6647 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6649 case OMPD_target_teams_distribute:
6650 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6651 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6652 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6653 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6654 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6655 ThreadLimitVal =
6656 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6658 if (llvm::Value *NumThreads =
6659 getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal))
6660 return NumThreads;
6661 return Bld.getInt32(0);
6662 case OMPD_target_parallel:
6663 case OMPD_target_parallel_for:
6664 case OMPD_target_parallel_for_simd:
6665 case OMPD_target_teams_distribute_parallel_for:
6666 case OMPD_target_teams_distribute_parallel_for_simd: {
6667 llvm::Value *CondVal = nullptr;
6668 // Handle if clause. If if clause present, the number of threads is
6669 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6670 if (D.hasClausesOfKind<OMPIfClause>()) {
6671 const OMPIfClause *IfClause = nullptr;
6672 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6673 if (C->getNameModifier() == OMPD_unknown ||
6674 C->getNameModifier() == OMPD_parallel) {
6675 IfClause = C;
6676 break;
6679 if (IfClause) {
6680 const Expr *Cond = IfClause->getCondition();
6681 bool Result;
6682 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6683 if (!Result)
6684 return Bld.getInt32(1);
6685 } else {
6686 CodeGenFunction::RunCleanupsScope Scope(CGF);
6687 CondVal = CGF.EvaluateExprAsBool(Cond);
6691 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6692 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6693 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6694 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6695 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6696 ThreadLimitVal =
6697 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6699 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6700 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6701 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6702 llvm::Value *NumThreads = CGF.EmitScalarExpr(
6703 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6704 NumThreadsVal =
6705 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6706 ThreadLimitVal = ThreadLimitVal
6707 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6708 ThreadLimitVal),
6709 NumThreadsVal, ThreadLimitVal)
6710 : NumThreadsVal;
6712 if (!ThreadLimitVal)
6713 ThreadLimitVal = Bld.getInt32(0);
6714 if (CondVal)
6715 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6716 return ThreadLimitVal;
6718 case OMPD_target_teams_distribute_simd:
6719 case OMPD_target_simd:
6720 return Bld.getInt32(1);
6721 case OMPD_parallel:
6722 case OMPD_for:
6723 case OMPD_parallel_for:
6724 case OMPD_parallel_master:
6725 case OMPD_parallel_sections:
6726 case OMPD_for_simd:
6727 case OMPD_parallel_for_simd:
6728 case OMPD_cancel:
6729 case OMPD_cancellation_point:
6730 case OMPD_ordered:
6731 case OMPD_threadprivate:
6732 case OMPD_allocate:
6733 case OMPD_task:
6734 case OMPD_simd:
6735 case OMPD_tile:
6736 case OMPD_unroll:
6737 case OMPD_sections:
6738 case OMPD_section:
6739 case OMPD_single:
6740 case OMPD_master:
6741 case OMPD_critical:
6742 case OMPD_taskyield:
6743 case OMPD_barrier:
6744 case OMPD_taskwait:
6745 case OMPD_taskgroup:
6746 case OMPD_atomic:
6747 case OMPD_flush:
6748 case OMPD_depobj:
6749 case OMPD_scan:
6750 case OMPD_teams:
6751 case OMPD_target_data:
6752 case OMPD_target_exit_data:
6753 case OMPD_target_enter_data:
6754 case OMPD_distribute:
6755 case OMPD_distribute_simd:
6756 case OMPD_distribute_parallel_for:
6757 case OMPD_distribute_parallel_for_simd:
6758 case OMPD_teams_distribute:
6759 case OMPD_teams_distribute_simd:
6760 case OMPD_teams_distribute_parallel_for:
6761 case OMPD_teams_distribute_parallel_for_simd:
6762 case OMPD_target_update:
6763 case OMPD_declare_simd:
6764 case OMPD_declare_variant:
6765 case OMPD_begin_declare_variant:
6766 case OMPD_end_declare_variant:
6767 case OMPD_declare_target:
6768 case OMPD_end_declare_target:
6769 case OMPD_declare_reduction:
6770 case OMPD_declare_mapper:
6771 case OMPD_taskloop:
6772 case OMPD_taskloop_simd:
6773 case OMPD_master_taskloop:
6774 case OMPD_master_taskloop_simd:
6775 case OMPD_parallel_master_taskloop:
6776 case OMPD_parallel_master_taskloop_simd:
6777 case OMPD_requires:
6778 case OMPD_metadirective:
6779 case OMPD_unknown:
6780 break;
6781 default:
6782 break;
6784 llvm_unreachable("Unsupported directive kind.");
6787 namespace {
6788 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6790 // Utility to handle information from clauses associated with a given
6791 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6792 // It provides a convenient interface to obtain the information and generate
6793 // code for that information.
6794 class MappableExprsHandler {
6795 public:
6796 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6797 static unsigned getFlagMemberOffset() {
6798 unsigned Offset = 0;
6799 for (uint64_t Remain =
6800 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6801 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6802 !(Remain & 1); Remain = Remain >> 1)
6803 Offset++;
6804 return Offset;
6807 /// Class that holds debugging information for a data mapping to be passed to
6808 /// the runtime library.
6809 class MappingExprInfo {
6810 /// The variable declaration used for the data mapping.
6811 const ValueDecl *MapDecl = nullptr;
6812 /// The original expression used in the map clause, or null if there is
6813 /// none.
6814 const Expr *MapExpr = nullptr;
6816 public:
6817 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6818 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6820 const ValueDecl *getMapDecl() const { return MapDecl; }
6821 const Expr *getMapExpr() const { return MapExpr; }
6824 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6825 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6826 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6827 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6828 using MapNonContiguousArrayTy =
6829 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6830 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6831 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6833 /// This structure contains combined information generated for mappable
6834 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6835 /// mappers, and non-contiguous information.
6836 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6837 MapExprsArrayTy Exprs;
6838 MapValueDeclsArrayTy Mappers;
6839 MapValueDeclsArrayTy DevicePtrDecls;
6841 /// Append arrays in \a CurInfo.
6842 void append(MapCombinedInfoTy &CurInfo) {
6843 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6844 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6845 CurInfo.DevicePtrDecls.end());
6846 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6847 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6851 /// Map between a struct and the its lowest & highest elements which have been
6852 /// mapped.
6853 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6854 /// HE(FieldIndex, Pointer)}
6855 struct StructRangeInfoTy {
6856 MapCombinedInfoTy PreliminaryMapData;
6857 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6858 0, Address::invalid()};
6859 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6860 0, Address::invalid()};
6861 Address Base = Address::invalid();
6862 Address LB = Address::invalid();
6863 bool IsArraySection = false;
6864 bool HasCompleteRecord = false;
6867 private:
6868 /// Kind that defines how a device pointer has to be returned.
6869 struct MapInfo {
6870 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6871 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6872 ArrayRef<OpenMPMapModifierKind> MapModifiers;
6873 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6874 bool ReturnDevicePointer = false;
6875 bool IsImplicit = false;
6876 const ValueDecl *Mapper = nullptr;
6877 const Expr *VarRef = nullptr;
6878 bool ForDeviceAddr = false;
6880 MapInfo() = default;
6881 MapInfo(
6882 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6883 OpenMPMapClauseKind MapType,
6884 ArrayRef<OpenMPMapModifierKind> MapModifiers,
6885 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6886 bool ReturnDevicePointer, bool IsImplicit,
6887 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6888 bool ForDeviceAddr = false)
6889 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6890 MotionModifiers(MotionModifiers),
6891 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6892 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6895 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6896 /// member and there is no map information about it, then emission of that
6897 /// entry is deferred until the whole struct has been processed.
6898 struct DeferredDevicePtrEntryTy {
6899 const Expr *IE = nullptr;
6900 const ValueDecl *VD = nullptr;
6901 bool ForDeviceAddr = false;
6903 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6904 bool ForDeviceAddr)
6905 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6908 /// The target directive from where the mappable clauses were extracted. It
6909 /// is either a executable directive or a user-defined mapper directive.
6910 llvm::PointerUnion<const OMPExecutableDirective *,
6911 const OMPDeclareMapperDecl *>
6912 CurDir;
6914 /// Function the directive is being generated for.
6915 CodeGenFunction &CGF;
6917 /// Set of all first private variables in the current directive.
6918 /// bool data is set to true if the variable is implicitly marked as
6919 /// firstprivate, false otherwise.
6920 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6922 /// Map between device pointer declarations and their expression components.
6923 /// The key value for declarations in 'this' is null.
6924 llvm::DenseMap<
6925 const ValueDecl *,
6926 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6927 DevPointersMap;
6929 /// Map between device addr declarations and their expression components.
6930 /// The key value for declarations in 'this' is null.
6931 llvm::DenseMap<
6932 const ValueDecl *,
6933 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6934 HasDevAddrsMap;
6936 /// Map between lambda declarations and their map type.
6937 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6939 llvm::Value *getExprTypeSize(const Expr *E) const {
6940 QualType ExprTy = E->getType().getCanonicalType();
6942 // Calculate the size for array shaping expression.
6943 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6944 llvm::Value *Size =
6945 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6946 for (const Expr *SE : OAE->getDimensions()) {
6947 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6948 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6949 CGF.getContext().getSizeType(),
6950 SE->getExprLoc());
6951 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6953 return Size;
6956 // Reference types are ignored for mapping purposes.
6957 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6958 ExprTy = RefTy->getPointeeType().getCanonicalType();
6960 // Given that an array section is considered a built-in type, we need to
6961 // do the calculation based on the length of the section instead of relying
6962 // on CGF.getTypeSize(E->getType()).
6963 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6964 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6965 OAE->getBase()->IgnoreParenImpCasts())
6966 .getCanonicalType();
6968 // If there is no length associated with the expression and lower bound is
6969 // not specified too, that means we are using the whole length of the
6970 // base.
6971 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6972 !OAE->getLowerBound())
6973 return CGF.getTypeSize(BaseTy);
6975 llvm::Value *ElemSize;
6976 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6977 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6978 } else {
6979 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6980 assert(ATy && "Expecting array type if not a pointer type.");
6981 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6984 // If we don't have a length at this point, that is because we have an
6985 // array section with a single element.
6986 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6987 return ElemSize;
6989 if (const Expr *LenExpr = OAE->getLength()) {
6990 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6991 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6992 CGF.getContext().getSizeType(),
6993 LenExpr->getExprLoc());
6994 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6996 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6997 OAE->getLowerBound() && "expected array_section[lb:].");
6998 // Size = sizetype - lb * elemtype;
6999 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7000 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7001 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7002 CGF.getContext().getSizeType(),
7003 OAE->getLowerBound()->getExprLoc());
7004 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7005 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7006 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7007 LengthVal = CGF.Builder.CreateSelect(
7008 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7009 return LengthVal;
7011 return CGF.getTypeSize(ExprTy);
7014 /// Return the corresponding bits for a given map clause modifier. Add
7015 /// a flag marking the map as a pointer if requested. Add a flag marking the
7016 /// map as the first one of a series of maps that relate to the same map
7017 /// expression.
7018 OpenMPOffloadMappingFlags getMapTypeBits(
7019 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7020 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7021 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7022 OpenMPOffloadMappingFlags Bits =
7023 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7024 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7025 switch (MapType) {
7026 case OMPC_MAP_alloc:
7027 case OMPC_MAP_release:
7028 // alloc and release is the default behavior in the runtime library, i.e.
7029 // if we don't pass any bits alloc/release that is what the runtime is
7030 // going to do. Therefore, we don't need to signal anything for these two
7031 // type modifiers.
7032 break;
7033 case OMPC_MAP_to:
7034 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7035 break;
7036 case OMPC_MAP_from:
7037 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7038 break;
7039 case OMPC_MAP_tofrom:
7040 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7041 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7042 break;
7043 case OMPC_MAP_delete:
7044 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7045 break;
7046 case OMPC_MAP_unknown:
7047 llvm_unreachable("Unexpected map type!");
7049 if (AddPtrFlag)
7050 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7051 if (AddIsTargetParamFlag)
7052 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7053 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7054 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7055 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7056 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7057 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7058 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7059 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7060 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7061 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7062 if (IsNonContiguous)
7063 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7064 return Bits;
7067 /// Return true if the provided expression is a final array section. A
7068 /// final array section, is one whose length can't be proved to be one.
7069 bool isFinalArraySectionExpression(const Expr *E) const {
7070 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7072 // It is not an array section and therefore not a unity-size one.
7073 if (!OASE)
7074 return false;
7076 // An array section with no colon always refer to a single element.
7077 if (OASE->getColonLocFirst().isInvalid())
7078 return false;
7080 const Expr *Length = OASE->getLength();
7082 // If we don't have a length we have to check if the array has size 1
7083 // for this dimension. Also, we should always expect a length if the
7084 // base type is pointer.
7085 if (!Length) {
7086 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7087 OASE->getBase()->IgnoreParenImpCasts())
7088 .getCanonicalType();
7089 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7090 return ATy->getSize().getSExtValue() != 1;
7091 // If we don't have a constant dimension length, we have to consider
7092 // the current section as having any size, so it is not necessarily
7093 // unitary. If it happen to be unity size, that's user fault.
7094 return true;
7097 // Check if the length evaluates to 1.
7098 Expr::EvalResult Result;
7099 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7100 return true; // Can have more that size 1.
7102 llvm::APSInt ConstLength = Result.Val.getInt();
7103 return ConstLength.getSExtValue() != 1;
7106 /// Generate the base pointers, section pointers, sizes, map type bits, and
7107 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7108 /// map type, map or motion modifiers, and expression components.
7109 /// \a IsFirstComponent should be set to true if the provided set of
7110 /// components is the first associated with a capture.
7111 void generateInfoForComponentList(
7112 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7113 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7114 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7115 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7116 bool IsFirstComponentList, bool IsImplicit,
7117 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7118 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7119 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7120 OverlappedElements = std::nullopt) const {
7121 // The following summarizes what has to be generated for each map and the
7122 // types below. The generated information is expressed in this order:
7123 // base pointer, section pointer, size, flags
7124 // (to add to the ones that come from the map type and modifier).
7126 // double d;
7127 // int i[100];
7128 // float *p;
7129 // int **a = &i;
7131 // struct S1 {
7132 // int i;
7133 // float f[50];
7134 // }
7135 // struct S2 {
7136 // int i;
7137 // float f[50];
7138 // S1 s;
7139 // double *p;
7140 // struct S2 *ps;
7141 // int &ref;
7142 // }
7143 // S2 s;
7144 // S2 *ps;
7146 // map(d)
7147 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7149 // map(i)
7150 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7152 // map(i[1:23])
7153 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7155 // map(p)
7156 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7158 // map(p[1:24])
7159 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7160 // in unified shared memory mode or for local pointers
7161 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7163 // map((*a)[0:3])
7164 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7165 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7167 // map(**a)
7168 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7169 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7171 // map(s)
7172 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7174 // map(s.i)
7175 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7177 // map(s.s.f)
7178 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7180 // map(s.p)
7181 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7183 // map(to: s.p[:22])
7184 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7185 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7186 // &(s.p), &(s.p[0]), 22*sizeof(double),
7187 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7188 // (*) alloc space for struct members, only this is a target parameter
7189 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7190 // optimizes this entry out, same in the examples below)
7191 // (***) map the pointee (map: to)
7193 // map(to: s.ref)
7194 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7195 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7196 // (*) alloc space for struct members, only this is a target parameter
7197 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7198 // optimizes this entry out, same in the examples below)
7199 // (***) map the pointee (map: to)
7201 // map(s.ps)
7202 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7204 // map(from: s.ps->s.i)
7205 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7206 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7207 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7209 // map(to: s.ps->ps)
7210 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7211 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7212 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7214 // map(s.ps->ps->ps)
7215 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7216 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7217 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7218 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7220 // map(to: s.ps->ps->s.f[:22])
7221 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7222 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7223 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7224 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7226 // map(ps)
7227 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7229 // map(ps->i)
7230 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7232 // map(ps->s.f)
7233 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7235 // map(from: ps->p)
7236 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7238 // map(to: ps->p[:22])
7239 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7240 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7241 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7243 // map(ps->ps)
7244 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7246 // map(from: ps->ps->s.i)
7247 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7248 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7249 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7251 // map(from: ps->ps->ps)
7252 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7253 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7254 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7256 // map(ps->ps->ps->ps)
7257 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7258 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7259 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7260 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7262 // map(to: ps->ps->ps->s.f[:22])
7263 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7264 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7265 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7266 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7268 // map(to: s.f[:22]) map(from: s.p[:33])
7269 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7270 // sizeof(double*) (**), TARGET_PARAM
7271 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7272 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7273 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7274 // (*) allocate contiguous space needed to fit all mapped members even if
7275 // we allocate space for members not mapped (in this example,
7276 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7277 // them as well because they fall between &s.f[0] and &s.p)
7279 // map(from: s.f[:22]) map(to: ps->p[:33])
7280 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7281 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7282 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7283 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7284 // (*) the struct this entry pertains to is the 2nd element in the list of
7285 // arguments, hence MEMBER_OF(2)
7287 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7288 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7289 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7290 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7291 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7292 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7293 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7294 // (*) the struct this entry pertains to is the 4th element in the list
7295 // of arguments, hence MEMBER_OF(4)
7297 // Track if the map information being generated is the first for a capture.
7298 bool IsCaptureFirstInfo = IsFirstComponentList;
7299 // When the variable is on a declare target link or in a to clause with
7300 // unified memory, a reference is needed to hold the host/device address
7301 // of the variable.
7302 bool RequiresReference = false;
7304 // Scan the components from the base to the complete expression.
7305 auto CI = Components.rbegin();
7306 auto CE = Components.rend();
7307 auto I = CI;
7309 // Track if the map information being generated is the first for a list of
7310 // components.
7311 bool IsExpressionFirstInfo = true;
7312 bool FirstPointerInComplexData = false;
7313 Address BP = Address::invalid();
7314 const Expr *AssocExpr = I->getAssociatedExpression();
7315 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7316 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7317 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7319 if (isa<MemberExpr>(AssocExpr)) {
7320 // The base is the 'this' pointer. The content of the pointer is going
7321 // to be the base of the field being mapped.
7322 BP = CGF.LoadCXXThisAddress();
7323 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7324 (OASE &&
7325 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7326 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7327 } else if (OAShE &&
7328 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7329 BP = Address(
7330 CGF.EmitScalarExpr(OAShE->getBase()),
7331 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7332 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7333 } else {
7334 // The base is the reference to the variable.
7335 // BP = &Var.
7336 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7337 if (const auto *VD =
7338 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7339 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7340 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7341 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7342 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7343 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7344 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7345 RequiresReference = true;
7346 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7351 // If the variable is a pointer and is being dereferenced (i.e. is not
7352 // the last component), the base has to be the pointer itself, not its
7353 // reference. References are ignored for mapping purposes.
7354 QualType Ty =
7355 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7356 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7357 // No need to generate individual map information for the pointer, it
7358 // can be associated with the combined storage if shared memory mode is
7359 // active or the base declaration is not global variable.
7360 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7361 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7362 !VD || VD->hasLocalStorage())
7363 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7364 else
7365 FirstPointerInComplexData = true;
7366 ++I;
7370 // Track whether a component of the list should be marked as MEMBER_OF some
7371 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7372 // in a component list should be marked as MEMBER_OF, all subsequent entries
7373 // do not belong to the base struct. E.g.
7374 // struct S2 s;
7375 // s.ps->ps->ps->f[:]
7376 // (1) (2) (3) (4)
7377 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7378 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7379 // is the pointee of ps(2) which is not member of struct s, so it should not
7380 // be marked as such (it is still PTR_AND_OBJ).
7381 // The variable is initialized to false so that PTR_AND_OBJ entries which
7382 // are not struct members are not considered (e.g. array of pointers to
7383 // data).
7384 bool ShouldBeMemberOf = false;
7386 // Variable keeping track of whether or not we have encountered a component
7387 // in the component list which is a member expression. Useful when we have a
7388 // pointer or a final array section, in which case it is the previous
7389 // component in the list which tells us whether we have a member expression.
7390 // E.g. X.f[:]
7391 // While processing the final array section "[:]" it is "f" which tells us
7392 // whether we are dealing with a member of a declared struct.
7393 const MemberExpr *EncounteredME = nullptr;
7395 // Track for the total number of dimension. Start from one for the dummy
7396 // dimension.
7397 uint64_t DimSize = 1;
7399 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7400 bool IsPrevMemberReference = false;
7402 for (; I != CE; ++I) {
7403 // If the current component is member of a struct (parent struct) mark it.
7404 if (!EncounteredME) {
7405 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7406 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7407 // as MEMBER_OF the parent struct.
7408 if (EncounteredME) {
7409 ShouldBeMemberOf = true;
7410 // Do not emit as complex pointer if this is actually not array-like
7411 // expression.
7412 if (FirstPointerInComplexData) {
7413 QualType Ty = std::prev(I)
7414 ->getAssociatedDeclaration()
7415 ->getType()
7416 .getNonReferenceType();
7417 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7418 FirstPointerInComplexData = false;
7423 auto Next = std::next(I);
7425 // We need to generate the addresses and sizes if this is the last
7426 // component, if the component is a pointer or if it is an array section
7427 // whose length can't be proved to be one. If this is a pointer, it
7428 // becomes the base address for the following components.
7430 // A final array section, is one whose length can't be proved to be one.
7431 // If the map item is non-contiguous then we don't treat any array section
7432 // as final array section.
7433 bool IsFinalArraySection =
7434 !IsNonContiguous &&
7435 isFinalArraySectionExpression(I->getAssociatedExpression());
7437 // If we have a declaration for the mapping use that, otherwise use
7438 // the base declaration of the map clause.
7439 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7440 ? I->getAssociatedDeclaration()
7441 : BaseDecl;
7442 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7443 : MapExpr;
7445 // Get information on whether the element is a pointer. Have to do a
7446 // special treatment for array sections given that they are built-in
7447 // types.
7448 const auto *OASE =
7449 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7450 const auto *OAShE =
7451 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7452 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7453 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7454 bool IsPointer =
7455 OAShE ||
7456 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7457 .getCanonicalType()
7458 ->isAnyPointerType()) ||
7459 I->getAssociatedExpression()->getType()->isAnyPointerType();
7460 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7461 MapDecl &&
7462 MapDecl->getType()->isLValueReferenceType();
7463 bool IsNonDerefPointer = IsPointer &&
7464 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7465 !IsNonContiguous;
7467 if (OASE)
7468 ++DimSize;
7470 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7471 IsFinalArraySection) {
7472 // If this is not the last component, we expect the pointer to be
7473 // associated with an array expression or member expression.
7474 assert((Next == CE ||
7475 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7476 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7477 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7478 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7479 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7480 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7481 "Unexpected expression");
7483 Address LB = Address::invalid();
7484 Address LowestElem = Address::invalid();
7485 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7486 const MemberExpr *E) {
7487 const Expr *BaseExpr = E->getBase();
7488 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7489 // scalar.
7490 LValue BaseLV;
7491 if (E->isArrow()) {
7492 LValueBaseInfo BaseInfo;
7493 TBAAAccessInfo TBAAInfo;
7494 Address Addr =
7495 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7496 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7497 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7498 } else {
7499 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7501 return BaseLV;
7503 if (OAShE) {
7504 LowestElem = LB =
7505 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7506 CGF.ConvertTypeForMem(
7507 OAShE->getBase()->getType()->getPointeeType()),
7508 CGF.getContext().getTypeAlignInChars(
7509 OAShE->getBase()->getType()));
7510 } else if (IsMemberReference) {
7511 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7512 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7513 LowestElem = CGF.EmitLValueForFieldInitialization(
7514 BaseLVal, cast<FieldDecl>(MapDecl))
7515 .getAddress(CGF);
7516 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7517 .getAddress(CGF);
7518 } else {
7519 LowestElem = LB =
7520 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7521 .getAddress(CGF);
7524 // If this component is a pointer inside the base struct then we don't
7525 // need to create any entry for it - it will be combined with the object
7526 // it is pointing to into a single PTR_AND_OBJ entry.
7527 bool IsMemberPointerOrAddr =
7528 EncounteredME &&
7529 (((IsPointer || ForDeviceAddr) &&
7530 I->getAssociatedExpression() == EncounteredME) ||
7531 (IsPrevMemberReference && !IsPointer) ||
7532 (IsMemberReference && Next != CE &&
7533 !Next->getAssociatedExpression()->getType()->isPointerType()));
7534 if (!OverlappedElements.empty() && Next == CE) {
7535 // Handle base element with the info for overlapped elements.
7536 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7537 assert(!IsPointer &&
7538 "Unexpected base element with the pointer type.");
7539 // Mark the whole struct as the struct that requires allocation on the
7540 // device.
7541 PartialStruct.LowestElem = {0, LowestElem};
7542 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7543 I->getAssociatedExpression()->getType());
7544 Address HB = CGF.Builder.CreateConstGEP(
7545 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7546 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7547 TypeSize.getQuantity() - 1);
7548 PartialStruct.HighestElem = {
7549 std::numeric_limits<decltype(
7550 PartialStruct.HighestElem.first)>::max(),
7551 HB};
7552 PartialStruct.Base = BP;
7553 PartialStruct.LB = LB;
7554 assert(
7555 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7556 "Overlapped elements must be used only once for the variable.");
7557 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7558 // Emit data for non-overlapped data.
7559 OpenMPOffloadMappingFlags Flags =
7560 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7561 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7562 /*AddPtrFlag=*/false,
7563 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7564 llvm::Value *Size = nullptr;
7565 // Do bitcopy of all non-overlapped structure elements.
7566 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7567 Component : OverlappedElements) {
7568 Address ComponentLB = Address::invalid();
7569 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7570 Component) {
7571 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7572 const auto *FD = dyn_cast<FieldDecl>(VD);
7573 if (FD && FD->getType()->isLValueReferenceType()) {
7574 const auto *ME =
7575 cast<MemberExpr>(MC.getAssociatedExpression());
7576 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7577 ComponentLB =
7578 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7579 .getAddress(CGF);
7580 } else {
7581 ComponentLB =
7582 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7583 .getAddress(CGF);
7585 Size = CGF.Builder.CreatePtrDiff(
7586 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7587 CGF.EmitCastToVoidPtr(LB.getPointer()));
7588 break;
7591 assert(Size && "Failed to determine structure size");
7592 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7593 CombinedInfo.BasePointers.push_back(BP.getPointer());
7594 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7595 CombinedInfo.Pointers.push_back(LB.getPointer());
7596 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7597 Size, CGF.Int64Ty, /*isSigned=*/true));
7598 CombinedInfo.Types.push_back(Flags);
7599 CombinedInfo.Mappers.push_back(nullptr);
7600 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7601 : 1);
7602 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7604 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7605 CombinedInfo.BasePointers.push_back(BP.getPointer());
7606 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7607 CombinedInfo.Pointers.push_back(LB.getPointer());
7608 Size = CGF.Builder.CreatePtrDiff(
7609 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7610 CGF.EmitCastToVoidPtr(LB.getPointer()));
7611 CombinedInfo.Sizes.push_back(
7612 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7613 CombinedInfo.Types.push_back(Flags);
7614 CombinedInfo.Mappers.push_back(nullptr);
7615 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7616 : 1);
7617 break;
7619 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7620 if (!IsMemberPointerOrAddr ||
7621 (Next == CE && MapType != OMPC_MAP_unknown)) {
7622 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7623 CombinedInfo.BasePointers.push_back(BP.getPointer());
7624 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7625 CombinedInfo.Pointers.push_back(LB.getPointer());
7626 CombinedInfo.Sizes.push_back(
7627 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7628 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7629 : 1);
7631 // If Mapper is valid, the last component inherits the mapper.
7632 bool HasMapper = Mapper && Next == CE;
7633 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7635 // We need to add a pointer flag for each map that comes from the
7636 // same expression except for the first one. We also need to signal
7637 // this map is the first one that relates with the current capture
7638 // (there is a set of entries for each capture).
7639 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7640 MapType, MapModifiers, MotionModifiers, IsImplicit,
7641 !IsExpressionFirstInfo || RequiresReference ||
7642 FirstPointerInComplexData || IsMemberReference,
7643 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7645 if (!IsExpressionFirstInfo || IsMemberReference) {
7646 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7647 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7648 if (IsPointer || (IsMemberReference && Next != CE))
7649 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7650 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7651 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7652 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7653 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7655 if (ShouldBeMemberOf) {
7656 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7657 // should be later updated with the correct value of MEMBER_OF.
7658 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7659 // From now on, all subsequent PTR_AND_OBJ entries should not be
7660 // marked as MEMBER_OF.
7661 ShouldBeMemberOf = false;
7665 CombinedInfo.Types.push_back(Flags);
7668 // If we have encountered a member expression so far, keep track of the
7669 // mapped member. If the parent is "*this", then the value declaration
7670 // is nullptr.
7671 if (EncounteredME) {
7672 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7673 unsigned FieldIndex = FD->getFieldIndex();
7675 // Update info about the lowest and highest elements for this struct
7676 if (!PartialStruct.Base.isValid()) {
7677 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7678 if (IsFinalArraySection) {
7679 Address HB =
7680 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7681 .getAddress(CGF);
7682 PartialStruct.HighestElem = {FieldIndex, HB};
7683 } else {
7684 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7686 PartialStruct.Base = BP;
7687 PartialStruct.LB = BP;
7688 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7689 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7690 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7691 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7695 // Need to emit combined struct for array sections.
7696 if (IsFinalArraySection || IsNonContiguous)
7697 PartialStruct.IsArraySection = true;
7699 // If we have a final array section, we are done with this expression.
7700 if (IsFinalArraySection)
7701 break;
7703 // The pointer becomes the base for the next element.
7704 if (Next != CE)
7705 BP = IsMemberReference ? LowestElem : LB;
7707 IsExpressionFirstInfo = false;
7708 IsCaptureFirstInfo = false;
7709 FirstPointerInComplexData = false;
7710 IsPrevMemberReference = IsMemberReference;
7711 } else if (FirstPointerInComplexData) {
7712 QualType Ty = Components.rbegin()
7713 ->getAssociatedDeclaration()
7714 ->getType()
7715 .getNonReferenceType();
7716 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7717 FirstPointerInComplexData = false;
7720 // If ran into the whole component - allocate the space for the whole
7721 // record.
7722 if (!EncounteredME)
7723 PartialStruct.HasCompleteRecord = true;
7725 if (!IsNonContiguous)
7726 return;
7728 const ASTContext &Context = CGF.getContext();
7730 // For supporting stride in array section, we need to initialize the first
7731 // dimension size as 1, first offset as 0, and first count as 1
7732 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7733 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7734 MapValuesArrayTy CurStrides;
7735 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7736 uint64_t ElementTypeSize;
7738 // Collect Size information for each dimension and get the element size as
7739 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7740 // should be [10, 10] and the first stride is 4 btyes.
7741 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7742 Components) {
7743 const Expr *AssocExpr = Component.getAssociatedExpression();
7744 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7746 if (!OASE)
7747 continue;
7749 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7750 auto *CAT = Context.getAsConstantArrayType(Ty);
7751 auto *VAT = Context.getAsVariableArrayType(Ty);
7753 // We need all the dimension size except for the last dimension.
7754 assert((VAT || CAT || &Component == &*Components.begin()) &&
7755 "Should be either ConstantArray or VariableArray if not the "
7756 "first Component");
7758 // Get element size if CurStrides is empty.
7759 if (CurStrides.empty()) {
7760 const Type *ElementType = nullptr;
7761 if (CAT)
7762 ElementType = CAT->getElementType().getTypePtr();
7763 else if (VAT)
7764 ElementType = VAT->getElementType().getTypePtr();
7765 else
7766 assert(&Component == &*Components.begin() &&
7767 "Only expect pointer (non CAT or VAT) when this is the "
7768 "first Component");
7769 // If ElementType is null, then it means the base is a pointer
7770 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7771 // for next iteration.
7772 if (ElementType) {
7773 // For the case that having pointer as base, we need to remove one
7774 // level of indirection.
7775 if (&Component != &*Components.begin())
7776 ElementType = ElementType->getPointeeOrArrayElementType();
7777 ElementTypeSize =
7778 Context.getTypeSizeInChars(ElementType).getQuantity();
7779 CurStrides.push_back(
7780 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7783 // Get dimension value except for the last dimension since we don't need
7784 // it.
7785 if (DimSizes.size() < Components.size() - 1) {
7786 if (CAT)
7787 DimSizes.push_back(llvm::ConstantInt::get(
7788 CGF.Int64Ty, CAT->getSize().getZExtValue()));
7789 else if (VAT)
7790 DimSizes.push_back(CGF.Builder.CreateIntCast(
7791 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7792 /*IsSigned=*/false));
7796 // Skip the dummy dimension since we have already have its information.
7797 auto *DI = DimSizes.begin() + 1;
7798 // Product of dimension.
7799 llvm::Value *DimProd =
7800 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7802 // Collect info for non-contiguous. Notice that offset, count, and stride
7803 // are only meaningful for array-section, so we insert a null for anything
7804 // other than array-section.
7805 // Also, the size of offset, count, and stride are not the same as
7806 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7807 // count, and stride are the same as the number of non-contiguous
7808 // declaration in target update to/from clause.
7809 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7810 Components) {
7811 const Expr *AssocExpr = Component.getAssociatedExpression();
7813 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7814 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7815 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7816 /*isSigned=*/false);
7817 CurOffsets.push_back(Offset);
7818 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7819 CurStrides.push_back(CurStrides.back());
7820 continue;
7823 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7825 if (!OASE)
7826 continue;
7828 // Offset
7829 const Expr *OffsetExpr = OASE->getLowerBound();
7830 llvm::Value *Offset = nullptr;
7831 if (!OffsetExpr) {
7832 // If offset is absent, then we just set it to zero.
7833 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7834 } else {
7835 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7836 CGF.Int64Ty,
7837 /*isSigned=*/false);
7839 CurOffsets.push_back(Offset);
7841 // Count
7842 const Expr *CountExpr = OASE->getLength();
7843 llvm::Value *Count = nullptr;
7844 if (!CountExpr) {
7845 // In Clang, once a high dimension is an array section, we construct all
7846 // the lower dimension as array section, however, for case like
7847 // arr[0:2][2], Clang construct the inner dimension as an array section
7848 // but it actually is not in an array section form according to spec.
7849 if (!OASE->getColonLocFirst().isValid() &&
7850 !OASE->getColonLocSecond().isValid()) {
7851 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7852 } else {
7853 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7854 // When the length is absent it defaults to ⌈(size −
7855 // lower-bound)/stride⌉, where size is the size of the array
7856 // dimension.
7857 const Expr *StrideExpr = OASE->getStride();
7858 llvm::Value *Stride =
7859 StrideExpr
7860 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7861 CGF.Int64Ty, /*isSigned=*/false)
7862 : nullptr;
7863 if (Stride)
7864 Count = CGF.Builder.CreateUDiv(
7865 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7866 else
7867 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7869 } else {
7870 Count = CGF.EmitScalarExpr(CountExpr);
7872 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7873 CurCounts.push_back(Count);
7875 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7876 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7877 // Offset Count Stride
7878 // D0 0 1 4 (int) <- dummy dimension
7879 // D1 0 2 8 (2 * (1) * 4)
7880 // D2 1 2 20 (1 * (1 * 5) * 4)
7881 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7882 const Expr *StrideExpr = OASE->getStride();
7883 llvm::Value *Stride =
7884 StrideExpr
7885 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7886 CGF.Int64Ty, /*isSigned=*/false)
7887 : nullptr;
7888 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7889 if (Stride)
7890 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7891 else
7892 CurStrides.push_back(DimProd);
7893 if (DI != DimSizes.end())
7894 ++DI;
7897 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7898 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7899 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7902 /// Return the adjusted map modifiers if the declaration a capture refers to
7903 /// appears in a first-private clause. This is expected to be used only with
7904 /// directives that start with 'target'.
7905 OpenMPOffloadMappingFlags
7906 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7907 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7909 // A first private variable captured by reference will use only the
7910 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7911 // declaration is known as first-private in this handler.
7912 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7913 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7914 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7915 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7916 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7917 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7919 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7920 if (I != LambdasMap.end())
7921 // for map(to: lambda): using user specified map type.
7922 return getMapTypeBits(
7923 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7924 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7925 /*AddPtrFlag=*/false,
7926 /*AddIsTargetParamFlag=*/false,
7927 /*isNonContiguous=*/false);
7928 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7929 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7932 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7933 // Rotate by getFlagMemberOffset() bits.
7934 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7935 << getFlagMemberOffset());
7938 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7939 OpenMPOffloadMappingFlags MemberOfFlag) {
7940 // If the entry is PTR_AND_OBJ but has not been marked with the special
7941 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7942 // marked as MEMBER_OF.
7943 if (static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7944 Flags & OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) &&
7945 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7946 (Flags & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
7947 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF))
7948 return;
7950 // Reset the placeholder value to prepare the flag for the assignment of the
7951 // proper MEMBER_OF value.
7952 Flags &= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7953 Flags |= MemberOfFlag;
7956 void getPlainLayout(const CXXRecordDecl *RD,
7957 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7958 bool AsBase) const {
7959 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7961 llvm::StructType *St =
7962 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7964 unsigned NumElements = St->getNumElements();
7965 llvm::SmallVector<
7966 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7967 RecordLayout(NumElements);
7969 // Fill bases.
7970 for (const auto &I : RD->bases()) {
7971 if (I.isVirtual())
7972 continue;
7973 const auto *Base = I.getType()->getAsCXXRecordDecl();
7974 // Ignore empty bases.
7975 if (Base->isEmpty() || CGF.getContext()
7976 .getASTRecordLayout(Base)
7977 .getNonVirtualSize()
7978 .isZero())
7979 continue;
7981 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7982 RecordLayout[FieldIndex] = Base;
7984 // Fill in virtual bases.
7985 for (const auto &I : RD->vbases()) {
7986 const auto *Base = I.getType()->getAsCXXRecordDecl();
7987 // Ignore empty bases.
7988 if (Base->isEmpty())
7989 continue;
7990 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7991 if (RecordLayout[FieldIndex])
7992 continue;
7993 RecordLayout[FieldIndex] = Base;
7995 // Fill in all the fields.
7996 assert(!RD->isUnion() && "Unexpected union.");
7997 for (const auto *Field : RD->fields()) {
7998 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7999 // will fill in later.)
8000 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8001 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8002 RecordLayout[FieldIndex] = Field;
8005 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8006 &Data : RecordLayout) {
8007 if (Data.isNull())
8008 continue;
8009 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8010 getPlainLayout(Base, Layout, /*AsBase=*/true);
8011 else
8012 Layout.push_back(Data.get<const FieldDecl *>());
8016 /// Generate all the base pointers, section pointers, sizes, map types, and
8017 /// mappers for the extracted mappable expressions (all included in \a
8018 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8019 /// pair of the relevant declaration and index where it occurs is appended to
8020 /// the device pointers info array.
8021 void generateAllInfoForClauses(
8022 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8023 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8024 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8025 // We have to process the component lists that relate with the same
8026 // declaration in a single chunk so that we can generate the map flags
8027 // correctly. Therefore, we organize all lists in a map.
8028 enum MapKind { Present, Allocs, Other, Total };
8029 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8030 SmallVector<SmallVector<MapInfo, 8>, 4>>
8031 Info;
8033 // Helper function to fill the information map for the different supported
8034 // clauses.
8035 auto &&InfoGen =
8036 [&Info, &SkipVarSet](
8037 const ValueDecl *D, MapKind Kind,
8038 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8039 OpenMPMapClauseKind MapType,
8040 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8041 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8042 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8043 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8044 if (SkipVarSet.contains(D))
8045 return;
8046 auto It = Info.find(D);
8047 if (It == Info.end())
8048 It = Info
8049 .insert(std::make_pair(
8050 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8051 .first;
8052 It->second[Kind].emplace_back(
8053 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8054 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8057 for (const auto *Cl : Clauses) {
8058 const auto *C = dyn_cast<OMPMapClause>(Cl);
8059 if (!C)
8060 continue;
8061 MapKind Kind = Other;
8062 if (llvm::is_contained(C->getMapTypeModifiers(),
8063 OMPC_MAP_MODIFIER_present))
8064 Kind = Present;
8065 else if (C->getMapType() == OMPC_MAP_alloc)
8066 Kind = Allocs;
8067 const auto *EI = C->getVarRefs().begin();
8068 for (const auto L : C->component_lists()) {
8069 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8070 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8071 C->getMapTypeModifiers(), std::nullopt,
8072 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8074 ++EI;
8077 for (const auto *Cl : Clauses) {
8078 const auto *C = dyn_cast<OMPToClause>(Cl);
8079 if (!C)
8080 continue;
8081 MapKind Kind = Other;
8082 if (llvm::is_contained(C->getMotionModifiers(),
8083 OMPC_MOTION_MODIFIER_present))
8084 Kind = Present;
8085 const auto *EI = C->getVarRefs().begin();
8086 for (const auto L : C->component_lists()) {
8087 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
8088 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8089 C->isImplicit(), std::get<2>(L), *EI);
8090 ++EI;
8093 for (const auto *Cl : Clauses) {
8094 const auto *C = dyn_cast<OMPFromClause>(Cl);
8095 if (!C)
8096 continue;
8097 MapKind Kind = Other;
8098 if (llvm::is_contained(C->getMotionModifiers(),
8099 OMPC_MOTION_MODIFIER_present))
8100 Kind = Present;
8101 const auto *EI = C->getVarRefs().begin();
8102 for (const auto L : C->component_lists()) {
8103 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
8104 std::nullopt, C->getMotionModifiers(),
8105 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8106 *EI);
8107 ++EI;
8111 // Look at the use_device_ptr and use_device_addr clauses information and
8112 // mark the existing map entries as such. If there is no map information for
8113 // an entry in the use_device_ptr and use_device_addr list, we create one
8114 // with map type 'alloc' and zero size section. It is the user fault if that
8115 // was not mapped before. If there is no map information and the pointer is
8116 // a struct member, then we defer the emission of that entry until the whole
8117 // struct has been processed.
8118 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8119 SmallVector<DeferredDevicePtrEntryTy, 4>>
8120 DeferredInfo;
8121 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8123 auto &&UseDeviceDataCombinedInfoGen =
8124 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8125 CodeGenFunction &CGF) {
8126 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8127 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8128 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8129 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8130 UseDeviceDataCombinedInfo.Sizes.push_back(
8131 llvm::Constant::getNullValue(CGF.Int64Ty));
8132 UseDeviceDataCombinedInfo.Types.push_back(
8133 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8134 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8137 auto &&MapInfoGen =
8138 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8139 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8140 OMPClauseMappableExprCommon::MappableExprComponentListRef
8141 Components,
8142 bool IsImplicit, bool IsDevAddr) {
8143 // We didn't find any match in our map information - generate a zero
8144 // size array section - if the pointer is a struct member we defer
8145 // this action until the whole struct has been processed.
8146 if (isa<MemberExpr>(IE)) {
8147 // Insert the pointer into Info to be processed by
8148 // generateInfoForComponentList. Because it is a member pointer
8149 // without a pointee, no entry will be generated for it, therefore
8150 // we need to generate one after the whole struct has been
8151 // processed. Nonetheless, generateInfoForComponentList must be
8152 // called to take the pointer into account for the calculation of
8153 // the range of the partial struct.
8154 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
8155 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
8156 nullptr, nullptr, IsDevAddr);
8157 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8158 } else {
8159 llvm::Value *Ptr;
8160 if (IsDevAddr) {
8161 if (IE->isGLValue())
8162 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8163 else
8164 Ptr = CGF.EmitScalarExpr(IE);
8165 } else {
8166 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8168 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF);
8172 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8173 const Expr *IE, bool IsDevAddr) -> bool {
8174 // We potentially have map information for this declaration already.
8175 // Look for the first set of components that refer to it. If found,
8176 // return true.
8177 // If the first component is a member expression, we have to look into
8178 // 'this', which maps to null in the map of map information. Otherwise
8179 // look directly for the information.
8180 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8181 if (It != Info.end()) {
8182 bool Found = false;
8183 for (auto &Data : It->second) {
8184 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8185 return MI.Components.back().getAssociatedDeclaration() == VD;
8187 // If we found a map entry, signal that the pointer has to be
8188 // returned and move on to the next declaration. Exclude cases where
8189 // the base pointer is mapped as array subscript, array section or
8190 // array shaping. The base address is passed as a pointer to base in
8191 // this case and cannot be used as a base for use_device_ptr list
8192 // item.
8193 if (CI != Data.end()) {
8194 if (IsDevAddr) {
8195 CI->ReturnDevicePointer = true;
8196 Found = true;
8197 break;
8198 } else {
8199 auto PrevCI = std::next(CI->Components.rbegin());
8200 const auto *VarD = dyn_cast<VarDecl>(VD);
8201 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8202 isa<MemberExpr>(IE) ||
8203 !VD->getType().getNonReferenceType()->isPointerType() ||
8204 PrevCI == CI->Components.rend() ||
8205 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8206 VarD->hasLocalStorage()) {
8207 CI->ReturnDevicePointer = true;
8208 Found = true;
8209 break;
8214 return Found;
8216 return false;
8219 // Look at the use_device_ptr clause information and mark the existing map
8220 // entries as such. If there is no map information for an entry in the
8221 // use_device_ptr list, we create one with map type 'alloc' and zero size
8222 // section. It is the user fault if that was not mapped before. If there is
8223 // no map information and the pointer is a struct member, then we defer the
8224 // emission of that entry until the whole struct has been processed.
8225 for (const auto *Cl : Clauses) {
8226 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8227 if (!C)
8228 continue;
8229 for (const auto L : C->component_lists()) {
8230 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8231 std::get<1>(L);
8232 assert(!Components.empty() &&
8233 "Not expecting empty list of components!");
8234 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8235 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8236 const Expr *IE = Components.back().getAssociatedExpression();
8237 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8238 continue;
8239 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8240 /*IsDevAddr=*/false);
8244 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8245 for (const auto *Cl : Clauses) {
8246 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8247 if (!C)
8248 continue;
8249 for (const auto L : C->component_lists()) {
8250 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8251 std::get<1>(L);
8252 assert(!std::get<1>(L).empty() &&
8253 "Not expecting empty list of components!");
8254 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8255 if (!Processed.insert(VD).second)
8256 continue;
8257 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8258 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8259 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8260 continue;
8261 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8262 /*IsDevAddr=*/true);
8266 for (const auto &Data : Info) {
8267 StructRangeInfoTy PartialStruct;
8268 // Temporary generated information.
8269 MapCombinedInfoTy CurInfo;
8270 const Decl *D = Data.first;
8271 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8272 for (const auto &M : Data.second) {
8273 for (const MapInfo &L : M) {
8274 assert(!L.Components.empty() &&
8275 "Not expecting declaration with no component lists.");
8277 // Remember the current base pointer index.
8278 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8279 CurInfo.NonContigInfo.IsNonContiguous =
8280 L.Components.back().isNonContiguous();
8281 generateInfoForComponentList(
8282 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8283 CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8284 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8286 // If this entry relates with a device pointer, set the relevant
8287 // declaration and add the 'return pointer' flag.
8288 if (L.ReturnDevicePointer) {
8289 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8290 "Unexpected number of mapped base pointers.");
8292 const ValueDecl *RelevantVD =
8293 L.Components.back().getAssociatedDeclaration();
8294 assert(RelevantVD &&
8295 "No relevant declaration related with device pointer??");
8297 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8298 CurInfo.Types[CurrentBasePointersIdx] |=
8299 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8304 // Append any pending zero-length pointers which are struct members and
8305 // used with use_device_ptr or use_device_addr.
8306 auto CI = DeferredInfo.find(Data.first);
8307 if (CI != DeferredInfo.end()) {
8308 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8309 llvm::Value *BasePtr;
8310 llvm::Value *Ptr;
8311 if (L.ForDeviceAddr) {
8312 if (L.IE->isGLValue())
8313 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8314 else
8315 Ptr = this->CGF.EmitScalarExpr(L.IE);
8316 BasePtr = Ptr;
8317 // Entry is RETURN_PARAM. Also, set the placeholder value
8318 // MEMBER_OF=FFFF so that the entry is later updated with the
8319 // correct value of MEMBER_OF.
8320 CurInfo.Types.push_back(
8321 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8322 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8323 } else {
8324 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8325 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8326 L.IE->getExprLoc());
8327 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8328 // placeholder value MEMBER_OF=FFFF so that the entry is later
8329 // updated with the correct value of MEMBER_OF.
8330 CurInfo.Types.push_back(
8331 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8332 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8333 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8335 CurInfo.Exprs.push_back(L.VD);
8336 CurInfo.BasePointers.emplace_back(BasePtr);
8337 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8338 CurInfo.Pointers.push_back(Ptr);
8339 CurInfo.Sizes.push_back(
8340 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8341 CurInfo.Mappers.push_back(nullptr);
8344 // If there is an entry in PartialStruct it means we have a struct with
8345 // individual members mapped. Emit an extra combined entry.
8346 if (PartialStruct.Base.isValid()) {
8347 CurInfo.NonContigInfo.Dims.push_back(0);
8348 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
8349 /*IsMapThis*/ !VD, VD);
8352 // We need to append the results of this capture to what we already
8353 // have.
8354 CombinedInfo.append(CurInfo);
8356 // Append data for use_device_ptr clauses.
8357 CombinedInfo.append(UseDeviceDataCombinedInfo);
8360 public:
8361 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8362 : CurDir(&Dir), CGF(CGF) {
8363 // Extract firstprivate clause information.
8364 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8365 for (const auto *D : C->varlists())
8366 FirstPrivateDecls.try_emplace(
8367 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8368 // Extract implicit firstprivates from uses_allocators clauses.
8369 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8370 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8371 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8372 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8373 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8374 /*Implicit=*/true);
8375 else if (const auto *VD = dyn_cast<VarDecl>(
8376 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8377 ->getDecl()))
8378 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8381 // Extract device pointer clause information.
8382 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8383 for (auto L : C->component_lists())
8384 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8385 // Extract device addr clause information.
8386 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8387 for (auto L : C->component_lists())
8388 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8389 // Extract map information.
8390 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8391 if (C->getMapType() != OMPC_MAP_to)
8392 continue;
8393 for (auto L : C->component_lists()) {
8394 const ValueDecl *VD = std::get<0>(L);
8395 const auto *RD = VD ? VD->getType()
8396 .getCanonicalType()
8397 .getNonReferenceType()
8398 ->getAsCXXRecordDecl()
8399 : nullptr;
8400 if (RD && RD->isLambda())
8401 LambdasMap.try_emplace(std::get<0>(L), C);
8406 /// Constructor for the declare mapper directive.
8407 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8408 : CurDir(&Dir), CGF(CGF) {}
8410 /// Generate code for the combined entry if we have a partially mapped struct
8411 /// and take care of the mapping flags of the arguments corresponding to
8412 /// individual struct members.
8413 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8414 MapFlagsArrayTy &CurTypes,
8415 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8416 const ValueDecl *VD = nullptr,
8417 bool NotTargetParams = true) const {
8418 if (CurTypes.size() == 1 &&
8419 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8420 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8421 !PartialStruct.IsArraySection)
8422 return;
8423 Address LBAddr = PartialStruct.LowestElem.second;
8424 Address HBAddr = PartialStruct.HighestElem.second;
8425 if (PartialStruct.HasCompleteRecord) {
8426 LBAddr = PartialStruct.LB;
8427 HBAddr = PartialStruct.LB;
8429 CombinedInfo.Exprs.push_back(VD);
8430 // Base is the base of the struct
8431 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8432 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8433 // Pointer is the address of the lowest element
8434 llvm::Value *LB = LBAddr.getPointer();
8435 const CXXMethodDecl *MD =
8436 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8437 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8438 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8439 // There should not be a mapper for a combined entry.
8440 if (HasBaseClass) {
8441 // OpenMP 5.2 148:21:
8442 // If the target construct is within a class non-static member function,
8443 // and a variable is an accessible data member of the object for which the
8444 // non-static data member function is invoked, the variable is treated as
8445 // if the this[:1] expression had appeared in a map clause with a map-type
8446 // of tofrom.
8447 // Emit this[:1]
8448 CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
8449 QualType Ty = MD->getThisType()->getPointeeType();
8450 llvm::Value *Size =
8451 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8452 /*isSigned=*/true);
8453 CombinedInfo.Sizes.push_back(Size);
8454 } else {
8455 CombinedInfo.Pointers.push_back(LB);
8456 // Size is (addr of {highest+1} element) - (addr of lowest element)
8457 llvm::Value *HB = HBAddr.getPointer();
8458 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8459 HBAddr.getElementType(), HB, /*Idx0=*/1);
8460 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8461 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8462 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8463 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8464 /*isSigned=*/false);
8465 CombinedInfo.Sizes.push_back(Size);
8467 CombinedInfo.Mappers.push_back(nullptr);
8468 // Map type is always TARGET_PARAM, if generate info for captures.
8469 CombinedInfo.Types.push_back(
8470 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8471 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8472 // If any element has the present modifier, then make sure the runtime
8473 // doesn't attempt to allocate the struct.
8474 if (CurTypes.end() !=
8475 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8476 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8477 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8479 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8480 // Remove TARGET_PARAM flag from the first element
8481 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8482 // If any element has the ompx_hold modifier, then make sure the runtime
8483 // uses the hold reference count for the struct as a whole so that it won't
8484 // be unmapped by an extra dynamic reference count decrement. Add it to all
8485 // elements as well so the runtime knows which reference count to check
8486 // when determining whether it's time for device-to-host transfers of
8487 // individual elements.
8488 if (CurTypes.end() !=
8489 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8490 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8491 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8492 })) {
8493 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8494 for (auto &M : CurTypes)
8495 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8498 // All other current entries will be MEMBER_OF the combined entry
8499 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8500 // 0xFFFF in the MEMBER_OF field).
8501 OpenMPOffloadMappingFlags MemberOfFlag =
8502 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8503 for (auto &M : CurTypes)
8504 setCorrectMemberOfFlag(M, MemberOfFlag);
8507 /// Generate all the base pointers, section pointers, sizes, map types, and
8508 /// mappers for the extracted mappable expressions (all included in \a
8509 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8510 /// pair of the relevant declaration and index where it occurs is appended to
8511 /// the device pointers info array.
8512 void generateAllInfo(
8513 MapCombinedInfoTy &CombinedInfo,
8514 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8515 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8516 assert(CurDir.is<const OMPExecutableDirective *>() &&
8517 "Expect a executable directive");
8518 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8519 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8522 /// Generate all the base pointers, section pointers, sizes, map types, and
8523 /// mappers for the extracted map clauses of user-defined mapper (all included
8524 /// in \a CombinedInfo).
8525 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8526 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8527 "Expect a declare mapper directive");
8528 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8529 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8532 /// Emit capture info for lambdas for variables captured by reference.
8533 void generateInfoForLambdaCaptures(
8534 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8535 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8536 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8537 const auto *RD = VDType->getAsCXXRecordDecl();
8538 if (!RD || !RD->isLambda())
8539 return;
8540 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8541 CGF.getContext().getDeclAlign(VD));
8542 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8543 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8544 FieldDecl *ThisCapture = nullptr;
8545 RD->getCaptureFields(Captures, ThisCapture);
8546 if (ThisCapture) {
8547 LValue ThisLVal =
8548 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8549 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8550 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8551 VDLVal.getPointer(CGF));
8552 CombinedInfo.Exprs.push_back(VD);
8553 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8554 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8555 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8556 CombinedInfo.Sizes.push_back(
8557 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8558 CGF.Int64Ty, /*isSigned=*/true));
8559 CombinedInfo.Types.push_back(
8560 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8561 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8562 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8563 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8564 CombinedInfo.Mappers.push_back(nullptr);
8566 for (const LambdaCapture &LC : RD->captures()) {
8567 if (!LC.capturesVariable())
8568 continue;
8569 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8570 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8571 continue;
8572 auto It = Captures.find(VD);
8573 assert(It != Captures.end() && "Found lambda capture without field.");
8574 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8575 if (LC.getCaptureKind() == LCK_ByRef) {
8576 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8577 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8578 VDLVal.getPointer(CGF));
8579 CombinedInfo.Exprs.push_back(VD);
8580 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8581 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8582 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8583 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8584 CGF.getTypeSize(
8585 VD->getType().getCanonicalType().getNonReferenceType()),
8586 CGF.Int64Ty, /*isSigned=*/true));
8587 } else {
8588 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8589 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8590 VDLVal.getPointer(CGF));
8591 CombinedInfo.Exprs.push_back(VD);
8592 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8593 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8594 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8595 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8597 CombinedInfo.Types.push_back(
8598 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8599 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8600 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8601 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8602 CombinedInfo.Mappers.push_back(nullptr);
8606 /// Set correct indices for lambdas captures.
8607 void adjustMemberOfForLambdaCaptures(
8608 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8609 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8610 MapFlagsArrayTy &Types) const {
8611 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8612 // Set correct member_of idx for all implicit lambda captures.
8613 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8614 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8615 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8616 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8617 continue;
8618 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8619 assert(BasePtr && "Unable to find base lambda address.");
8620 int TgtIdx = -1;
8621 for (unsigned J = I; J > 0; --J) {
8622 unsigned Idx = J - 1;
8623 if (Pointers[Idx] != BasePtr)
8624 continue;
8625 TgtIdx = Idx;
8626 break;
8628 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8629 // All other current entries will be MEMBER_OF the combined entry
8630 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8631 // 0xFFFF in the MEMBER_OF field).
8632 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8633 setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8637 /// Generate the base pointers, section pointers, sizes, map types, and
8638 /// mappers associated to a given capture (all included in \a CombinedInfo).
8639 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8640 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8641 StructRangeInfoTy &PartialStruct) const {
8642 assert(!Cap->capturesVariableArrayType() &&
8643 "Not expecting to generate map info for a variable array type!");
8645 // We need to know when we generating information for the first component
8646 const ValueDecl *VD = Cap->capturesThis()
8647 ? nullptr
8648 : Cap->getCapturedVar()->getCanonicalDecl();
8650 // for map(to: lambda): skip here, processing it in
8651 // generateDefaultMapInfo
8652 if (LambdasMap.count(VD))
8653 return;
8655 // If this declaration appears in a is_device_ptr clause we just have to
8656 // pass the pointer by value. If it is a reference to a declaration, we just
8657 // pass its value.
8658 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8659 CombinedInfo.Exprs.push_back(VD);
8660 CombinedInfo.BasePointers.emplace_back(Arg);
8661 CombinedInfo.DevicePtrDecls.emplace_back(VD);
8662 CombinedInfo.Pointers.push_back(Arg);
8663 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8664 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8665 /*isSigned=*/true));
8666 CombinedInfo.Types.push_back(
8667 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8668 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8669 CombinedInfo.Mappers.push_back(nullptr);
8670 return;
8673 using MapData =
8674 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8675 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8676 const ValueDecl *, const Expr *>;
8677 SmallVector<MapData, 4> DeclComponentLists;
8678 // For member fields list in is_device_ptr, store it in
8679 // DeclComponentLists for generating components info.
8680 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8681 auto It = DevPointersMap.find(VD);
8682 if (It != DevPointersMap.end())
8683 for (const auto &MCL : It->second)
8684 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8685 /*IsImpicit = */ true, nullptr,
8686 nullptr);
8687 auto I = HasDevAddrsMap.find(VD);
8688 if (I != HasDevAddrsMap.end())
8689 for (const auto &MCL : I->second)
8690 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8691 /*IsImpicit = */ true, nullptr,
8692 nullptr);
8693 assert(CurDir.is<const OMPExecutableDirective *>() &&
8694 "Expect a executable directive");
8695 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8696 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8697 const auto *EI = C->getVarRefs().begin();
8698 for (const auto L : C->decl_component_lists(VD)) {
8699 const ValueDecl *VDecl, *Mapper;
8700 // The Expression is not correct if the mapping is implicit
8701 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8702 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8703 std::tie(VDecl, Components, Mapper) = L;
8704 assert(VDecl == VD && "We got information for the wrong declaration??");
8705 assert(!Components.empty() &&
8706 "Not expecting declaration with no component lists.");
8707 DeclComponentLists.emplace_back(Components, C->getMapType(),
8708 C->getMapTypeModifiers(),
8709 C->isImplicit(), Mapper, E);
8710 ++EI;
8713 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8714 const MapData &RHS) {
8715 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8716 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8717 bool HasPresent =
8718 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8719 bool HasAllocs = MapType == OMPC_MAP_alloc;
8720 MapModifiers = std::get<2>(RHS);
8721 MapType = std::get<1>(LHS);
8722 bool HasPresentR =
8723 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8724 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8725 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8728 // Find overlapping elements (including the offset from the base element).
8729 llvm::SmallDenseMap<
8730 const MapData *,
8731 llvm::SmallVector<
8732 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8734 OverlappedData;
8735 size_t Count = 0;
8736 for (const MapData &L : DeclComponentLists) {
8737 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8738 OpenMPMapClauseKind MapType;
8739 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8740 bool IsImplicit;
8741 const ValueDecl *Mapper;
8742 const Expr *VarRef;
8743 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8745 ++Count;
8746 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8747 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8748 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8749 VarRef) = L1;
8750 auto CI = Components.rbegin();
8751 auto CE = Components.rend();
8752 auto SI = Components1.rbegin();
8753 auto SE = Components1.rend();
8754 for (; CI != CE && SI != SE; ++CI, ++SI) {
8755 if (CI->getAssociatedExpression()->getStmtClass() !=
8756 SI->getAssociatedExpression()->getStmtClass())
8757 break;
8758 // Are we dealing with different variables/fields?
8759 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8760 break;
8762 // Found overlapping if, at least for one component, reached the head
8763 // of the components list.
8764 if (CI == CE || SI == SE) {
8765 // Ignore it if it is the same component.
8766 if (CI == CE && SI == SE)
8767 continue;
8768 const auto It = (SI == SE) ? CI : SI;
8769 // If one component is a pointer and another one is a kind of
8770 // dereference of this pointer (array subscript, section, dereference,
8771 // etc.), it is not an overlapping.
8772 // Same, if one component is a base and another component is a
8773 // dereferenced pointer memberexpr with the same base.
8774 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8775 (std::prev(It)->getAssociatedDeclaration() &&
8776 std::prev(It)
8777 ->getAssociatedDeclaration()
8778 ->getType()
8779 ->isPointerType()) ||
8780 (It->getAssociatedDeclaration() &&
8781 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8782 std::next(It) != CE && std::next(It) != SE))
8783 continue;
8784 const MapData &BaseData = CI == CE ? L : L1;
8785 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8786 SI == SE ? Components : Components1;
8787 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8788 OverlappedElements.getSecond().push_back(SubData);
8792 // Sort the overlapped elements for each item.
8793 llvm::SmallVector<const FieldDecl *, 4> Layout;
8794 if (!OverlappedData.empty()) {
8795 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8796 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8797 while (BaseType != OrigType) {
8798 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8799 OrigType = BaseType->getPointeeOrArrayElementType();
8802 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8803 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8804 else {
8805 const auto *RD = BaseType->getAsRecordDecl();
8806 Layout.append(RD->field_begin(), RD->field_end());
8809 for (auto &Pair : OverlappedData) {
8810 llvm::stable_sort(
8811 Pair.getSecond(),
8812 [&Layout](
8813 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8814 OMPClauseMappableExprCommon::MappableExprComponentListRef
8815 Second) {
8816 auto CI = First.rbegin();
8817 auto CE = First.rend();
8818 auto SI = Second.rbegin();
8819 auto SE = Second.rend();
8820 for (; CI != CE && SI != SE; ++CI, ++SI) {
8821 if (CI->getAssociatedExpression()->getStmtClass() !=
8822 SI->getAssociatedExpression()->getStmtClass())
8823 break;
8824 // Are we dealing with different variables/fields?
8825 if (CI->getAssociatedDeclaration() !=
8826 SI->getAssociatedDeclaration())
8827 break;
8830 // Lists contain the same elements.
8831 if (CI == CE && SI == SE)
8832 return false;
8834 // List with less elements is less than list with more elements.
8835 if (CI == CE || SI == SE)
8836 return CI == CE;
8838 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8839 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8840 if (FD1->getParent() == FD2->getParent())
8841 return FD1->getFieldIndex() < FD2->getFieldIndex();
8842 const auto *It =
8843 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8844 return FD == FD1 || FD == FD2;
8846 return *It == FD1;
8850 // Associated with a capture, because the mapping flags depend on it.
8851 // Go through all of the elements with the overlapped elements.
8852 bool IsFirstComponentList = true;
8853 for (const auto &Pair : OverlappedData) {
8854 const MapData &L = *Pair.getFirst();
8855 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8856 OpenMPMapClauseKind MapType;
8857 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8858 bool IsImplicit;
8859 const ValueDecl *Mapper;
8860 const Expr *VarRef;
8861 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8863 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8864 OverlappedComponents = Pair.getSecond();
8865 generateInfoForComponentList(
8866 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8867 PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8868 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8869 IsFirstComponentList = false;
8871 // Go through other elements without overlapped elements.
8872 for (const MapData &L : DeclComponentLists) {
8873 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8874 OpenMPMapClauseKind MapType;
8875 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8876 bool IsImplicit;
8877 const ValueDecl *Mapper;
8878 const Expr *VarRef;
8879 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8881 auto It = OverlappedData.find(&L);
8882 if (It == OverlappedData.end())
8883 generateInfoForComponentList(MapType, MapModifiers, std::nullopt,
8884 Components, CombinedInfo, PartialStruct,
8885 IsFirstComponentList, IsImplicit, Mapper,
8886 /*ForDeviceAddr=*/false, VD, VarRef);
8887 IsFirstComponentList = false;
8891 /// Generate the default map information for a given capture \a CI,
8892 /// record field declaration \a RI and captured value \a CV.
8893 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8894 const FieldDecl &RI, llvm::Value *CV,
8895 MapCombinedInfoTy &CombinedInfo) const {
8896 bool IsImplicit = true;
8897 // Do the default mapping.
8898 if (CI.capturesThis()) {
8899 CombinedInfo.Exprs.push_back(nullptr);
8900 CombinedInfo.BasePointers.push_back(CV);
8901 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8902 CombinedInfo.Pointers.push_back(CV);
8903 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8904 CombinedInfo.Sizes.push_back(
8905 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8906 CGF.Int64Ty, /*isSigned=*/true));
8907 // Default map type.
8908 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8909 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8910 } else if (CI.capturesVariableByCopy()) {
8911 const VarDecl *VD = CI.getCapturedVar();
8912 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8913 CombinedInfo.BasePointers.push_back(CV);
8914 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8915 CombinedInfo.Pointers.push_back(CV);
8916 if (!RI.getType()->isAnyPointerType()) {
8917 // We have to signal to the runtime captures passed by value that are
8918 // not pointers.
8919 CombinedInfo.Types.push_back(
8920 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8921 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8922 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8923 } else {
8924 // Pointers are implicitly mapped with a zero size and no flags
8925 // (other than first map that is added for all implicit maps).
8926 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8927 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8929 auto I = FirstPrivateDecls.find(VD);
8930 if (I != FirstPrivateDecls.end())
8931 IsImplicit = I->getSecond();
8932 } else {
8933 assert(CI.capturesVariable() && "Expected captured reference.");
8934 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8935 QualType ElementType = PtrTy->getPointeeType();
8936 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8937 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8938 // The default map type for a scalar/complex type is 'to' because by
8939 // default the value doesn't have to be retrieved. For an aggregate
8940 // type, the default is 'tofrom'.
8941 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8942 const VarDecl *VD = CI.getCapturedVar();
8943 auto I = FirstPrivateDecls.find(VD);
8944 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8945 CombinedInfo.BasePointers.push_back(CV);
8946 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8947 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8948 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8949 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8950 AlignmentSource::Decl));
8951 CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8952 } else {
8953 CombinedInfo.Pointers.push_back(CV);
8955 if (I != FirstPrivateDecls.end())
8956 IsImplicit = I->getSecond();
8958 // Every default map produces a single argument which is a target parameter.
8959 CombinedInfo.Types.back() |=
8960 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8962 // Add flag stating this is an implicit map.
8963 if (IsImplicit)
8964 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8966 // No user-defined mapper for default mapping.
8967 CombinedInfo.Mappers.push_back(nullptr);
8970 } // anonymous namespace
8972 // Try to extract the base declaration from a `this->x` expression if possible.
8973 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8974 if (!E)
8975 return nullptr;
8977 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
8978 if (const MemberExpr *ME =
8979 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8980 return ME->getMemberDecl();
8981 return nullptr;
8984 /// Emit a string constant containing the names of the values mapped to the
8985 /// offloading runtime library.
8986 llvm::Constant *
8987 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8988 MappableExprsHandler::MappingExprInfo &MapExprs) {
8990 uint32_t SrcLocStrSize;
8991 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8992 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8994 SourceLocation Loc;
8995 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8996 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8997 Loc = VD->getLocation();
8998 else
8999 Loc = MapExprs.getMapExpr()->getExprLoc();
9000 } else {
9001 Loc = MapExprs.getMapDecl()->getLocation();
9004 std::string ExprName;
9005 if (MapExprs.getMapExpr()) {
9006 PrintingPolicy P(CGF.getContext().getLangOpts());
9007 llvm::raw_string_ostream OS(ExprName);
9008 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9009 OS.flush();
9010 } else {
9011 ExprName = MapExprs.getMapDecl()->getNameAsString();
9014 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9015 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9016 PLoc.getLine(), PLoc.getColumn(),
9017 SrcLocStrSize);
9020 /// Emit the arrays used to pass the captures and map information to the
9021 /// offloading runtime library. If there is no map or capture information,
9022 /// return nullptr by reference.
9023 static void emitOffloadingArrays(
9024 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9025 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9026 bool IsNonContiguous = false) {
9027 CodeGenModule &CGM = CGF.CGM;
9028 ASTContext &Ctx = CGF.getContext();
9030 // Reset the array information.
9031 Info.clearArrayInfo();
9032 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9034 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
9035 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
9036 CGF.AllocaInsertPt->getIterator());
9037 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
9038 CGF.Builder.GetInsertPoint());
9040 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9041 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9043 if (CGM.getCodeGenOpts().getDebugInfo() !=
9044 llvm::codegenoptions::NoDebugInfo) {
9045 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9046 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9047 fillInfoMap);
9050 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *BP, llvm::Value *BPVal) {
9051 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
9052 Address BPAddr(BP, BPVal->getType(),
9053 Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9054 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9058 auto CustomMapperCB = [&](unsigned int I) {
9059 llvm::Value *MFunc = nullptr;
9060 if (CombinedInfo.Mappers[I]) {
9061 Info.HasMapper = true;
9062 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9063 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9065 return MFunc;
9067 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
9068 /*IsNonContiguous=*/true, DeviceAddrCB,
9069 CustomMapperCB);
9072 /// Check for inner distribute directive.
9073 static const OMPExecutableDirective *
9074 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9075 const auto *CS = D.getInnermostCapturedStmt();
9076 const auto *Body =
9077 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9078 const Stmt *ChildStmt =
9079 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9081 if (const auto *NestedDir =
9082 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9083 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9084 switch (D.getDirectiveKind()) {
9085 case OMPD_target:
9086 if (isOpenMPDistributeDirective(DKind))
9087 return NestedDir;
9088 if (DKind == OMPD_teams) {
9089 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9090 /*IgnoreCaptured=*/true);
9091 if (!Body)
9092 return nullptr;
9093 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9094 if (const auto *NND =
9095 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9096 DKind = NND->getDirectiveKind();
9097 if (isOpenMPDistributeDirective(DKind))
9098 return NND;
9101 return nullptr;
9102 case OMPD_target_teams:
9103 if (isOpenMPDistributeDirective(DKind))
9104 return NestedDir;
9105 return nullptr;
9106 case OMPD_target_parallel:
9107 case OMPD_target_simd:
9108 case OMPD_target_parallel_for:
9109 case OMPD_target_parallel_for_simd:
9110 return nullptr;
9111 case OMPD_target_teams_distribute:
9112 case OMPD_target_teams_distribute_simd:
9113 case OMPD_target_teams_distribute_parallel_for:
9114 case OMPD_target_teams_distribute_parallel_for_simd:
9115 case OMPD_parallel:
9116 case OMPD_for:
9117 case OMPD_parallel_for:
9118 case OMPD_parallel_master:
9119 case OMPD_parallel_sections:
9120 case OMPD_for_simd:
9121 case OMPD_parallel_for_simd:
9122 case OMPD_cancel:
9123 case OMPD_cancellation_point:
9124 case OMPD_ordered:
9125 case OMPD_threadprivate:
9126 case OMPD_allocate:
9127 case OMPD_task:
9128 case OMPD_simd:
9129 case OMPD_tile:
9130 case OMPD_unroll:
9131 case OMPD_sections:
9132 case OMPD_section:
9133 case OMPD_single:
9134 case OMPD_master:
9135 case OMPD_critical:
9136 case OMPD_taskyield:
9137 case OMPD_barrier:
9138 case OMPD_taskwait:
9139 case OMPD_taskgroup:
9140 case OMPD_atomic:
9141 case OMPD_flush:
9142 case OMPD_depobj:
9143 case OMPD_scan:
9144 case OMPD_teams:
9145 case OMPD_target_data:
9146 case OMPD_target_exit_data:
9147 case OMPD_target_enter_data:
9148 case OMPD_distribute:
9149 case OMPD_distribute_simd:
9150 case OMPD_distribute_parallel_for:
9151 case OMPD_distribute_parallel_for_simd:
9152 case OMPD_teams_distribute:
9153 case OMPD_teams_distribute_simd:
9154 case OMPD_teams_distribute_parallel_for:
9155 case OMPD_teams_distribute_parallel_for_simd:
9156 case OMPD_target_update:
9157 case OMPD_declare_simd:
9158 case OMPD_declare_variant:
9159 case OMPD_begin_declare_variant:
9160 case OMPD_end_declare_variant:
9161 case OMPD_declare_target:
9162 case OMPD_end_declare_target:
9163 case OMPD_declare_reduction:
9164 case OMPD_declare_mapper:
9165 case OMPD_taskloop:
9166 case OMPD_taskloop_simd:
9167 case OMPD_master_taskloop:
9168 case OMPD_master_taskloop_simd:
9169 case OMPD_parallel_master_taskloop:
9170 case OMPD_parallel_master_taskloop_simd:
9171 case OMPD_requires:
9172 case OMPD_metadirective:
9173 case OMPD_unknown:
9174 default:
9175 llvm_unreachable("Unexpected directive.");
9179 return nullptr;
9182 /// Emit the user-defined mapper function. The code generation follows the
9183 /// pattern in the example below.
9184 /// \code
9185 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9186 /// void *base, void *begin,
9187 /// int64_t size, int64_t type,
9188 /// void *name = nullptr) {
9189 /// // Allocate space for an array section first or add a base/begin for
9190 /// // pointer dereference.
9191 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9192 /// !maptype.IsDelete)
9193 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9194 /// size*sizeof(Ty), clearToFromMember(type));
9195 /// // Map members.
9196 /// for (unsigned i = 0; i < size; i++) {
9197 /// // For each component specified by this mapper:
9198 /// for (auto c : begin[i]->all_components) {
9199 /// if (c.hasMapper())
9200 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9201 /// c.arg_type, c.arg_name);
9202 /// else
9203 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9204 /// c.arg_begin, c.arg_size, c.arg_type,
9205 /// c.arg_name);
9206 /// }
9207 /// }
9208 /// // Delete the array section.
9209 /// if (size > 1 && maptype.IsDelete)
9210 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9211 /// size*sizeof(Ty), clearToFromMember(type));
9212 /// }
9213 /// \endcode
9214 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9215 CodeGenFunction *CGF) {
9216 if (UDMMap.count(D) > 0)
9217 return;
9218 ASTContext &C = CGM.getContext();
9219 QualType Ty = D->getType();
9220 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9221 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9222 auto *MapperVarDecl =
9223 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9224 SourceLocation Loc = D->getLocation();
9225 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9226 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9228 // Prepare mapper function arguments and attributes.
9229 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9230 C.VoidPtrTy, ImplicitParamDecl::Other);
9231 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9232 ImplicitParamDecl::Other);
9233 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9234 C.VoidPtrTy, ImplicitParamDecl::Other);
9235 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9236 ImplicitParamDecl::Other);
9237 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9238 ImplicitParamDecl::Other);
9239 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9240 ImplicitParamDecl::Other);
9241 FunctionArgList Args;
9242 Args.push_back(&HandleArg);
9243 Args.push_back(&BaseArg);
9244 Args.push_back(&BeginArg);
9245 Args.push_back(&SizeArg);
9246 Args.push_back(&TypeArg);
9247 Args.push_back(&NameArg);
9248 const CGFunctionInfo &FnInfo =
9249 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9250 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9251 SmallString<64> TyStr;
9252 llvm::raw_svector_ostream Out(TyStr);
9253 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9254 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9255 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9256 Name, &CGM.getModule());
9257 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9258 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9259 // Start the mapper function code generation.
9260 CodeGenFunction MapperCGF(CGM);
9261 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9262 // Compute the starting and end addresses of array elements.
9263 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9264 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9265 C.getPointerType(Int64Ty), Loc);
9266 // Prepare common arguments for array initiation and deletion.
9267 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9268 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9269 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9270 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9271 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9272 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9273 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9274 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9275 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9276 // Convert the size in bytes into the number of array elements.
9277 Size = MapperCGF.Builder.CreateExactUDiv(
9278 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9279 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9280 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9281 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9282 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9283 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9284 C.getPointerType(Int64Ty), Loc);
9285 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9286 MapperCGF.GetAddrOfLocalVar(&NameArg),
9287 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9289 // Emit array initiation if this is an array section and \p MapType indicates
9290 // that memory allocation is required.
9291 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9292 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9293 MapName, ElementSize, HeadBB, /*IsInit=*/true);
9295 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9297 // Emit the loop header block.
9298 MapperCGF.EmitBlock(HeadBB);
9299 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9300 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9301 // Evaluate whether the initial condition is satisfied.
9302 llvm::Value *IsEmpty =
9303 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9304 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9305 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9307 // Emit the loop body block.
9308 MapperCGF.EmitBlock(BodyBB);
9309 llvm::BasicBlock *LastBB = BodyBB;
9310 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9311 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9312 PtrPHI->addIncoming(PtrBegin, EntryBB);
9313 Address PtrCurrent(PtrPHI, ElemTy,
9314 MapperCGF.GetAddrOfLocalVar(&BeginArg)
9315 .getAlignment()
9316 .alignmentOfArrayElement(ElementSize));
9317 // Privatize the declared variable of mapper to be the current array element.
9318 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9319 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9320 (void)Scope.Privatize();
9322 // Get map clause information. Fill up the arrays with all mapped variables.
9323 MappableExprsHandler::MapCombinedInfoTy Info;
9324 MappableExprsHandler MEHandler(*D, MapperCGF);
9325 MEHandler.generateAllInfoForMapper(Info);
9327 // Call the runtime API __tgt_mapper_num_components to get the number of
9328 // pre-existing components.
9329 llvm::Value *OffloadingArgs[] = {Handle};
9330 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9331 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9332 OMPRTL___tgt_mapper_num_components),
9333 OffloadingArgs);
9334 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9335 PreviousSize,
9336 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9338 // Fill up the runtime mapper handle for all components.
9339 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9340 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9341 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9342 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9343 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9344 llvm::Value *CurSizeArg = Info.Sizes[I];
9345 llvm::Value *CurNameArg =
9346 (CGM.getCodeGenOpts().getDebugInfo() ==
9347 llvm::codegenoptions::NoDebugInfo)
9348 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9349 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9351 // Extract the MEMBER_OF field from the map type.
9352 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9353 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9354 Info.Types[I]));
9355 llvm::Value *MemberMapType =
9356 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9358 // Combine the map type inherited from user-defined mapper with that
9359 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9360 // bits of the \a MapType, which is the input argument of the mapper
9361 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9362 // bits of MemberMapType.
9363 // [OpenMP 5.0], 1.2.6. map-type decay.
9364 // | alloc | to | from | tofrom | release | delete
9365 // ----------------------------------------------------------
9366 // alloc | alloc | alloc | alloc | alloc | release | delete
9367 // to | alloc | to | alloc | to | release | delete
9368 // from | alloc | alloc | from | from | release | delete
9369 // tofrom | alloc | to | from | tofrom | release | delete
9370 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9371 MapType,
9372 MapperCGF.Builder.getInt64(
9373 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9374 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9375 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9376 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9377 llvm::BasicBlock *AllocElseBB =
9378 MapperCGF.createBasicBlock("omp.type.alloc.else");
9379 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9380 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9381 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9382 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9383 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9384 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9385 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9386 MapperCGF.EmitBlock(AllocBB);
9387 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9388 MemberMapType,
9389 MapperCGF.Builder.getInt64(
9390 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9391 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9392 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9393 MapperCGF.Builder.CreateBr(EndBB);
9394 MapperCGF.EmitBlock(AllocElseBB);
9395 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9396 LeftToFrom,
9397 MapperCGF.Builder.getInt64(
9398 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9399 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9400 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9401 // In case of to, clear OMP_MAP_FROM.
9402 MapperCGF.EmitBlock(ToBB);
9403 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9404 MemberMapType,
9405 MapperCGF.Builder.getInt64(
9406 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9407 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9408 MapperCGF.Builder.CreateBr(EndBB);
9409 MapperCGF.EmitBlock(ToElseBB);
9410 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9411 LeftToFrom,
9412 MapperCGF.Builder.getInt64(
9413 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9414 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9415 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9416 // In case of from, clear OMP_MAP_TO.
9417 MapperCGF.EmitBlock(FromBB);
9418 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9419 MemberMapType,
9420 MapperCGF.Builder.getInt64(
9421 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9422 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9423 // In case of tofrom, do nothing.
9424 MapperCGF.EmitBlock(EndBB);
9425 LastBB = EndBB;
9426 llvm::PHINode *CurMapType =
9427 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9428 CurMapType->addIncoming(AllocMapType, AllocBB);
9429 CurMapType->addIncoming(ToMapType, ToBB);
9430 CurMapType->addIncoming(FromMapType, FromBB);
9431 CurMapType->addIncoming(MemberMapType, ToElseBB);
9433 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9434 CurSizeArg, CurMapType, CurNameArg};
9435 if (Info.Mappers[I]) {
9436 // Call the corresponding mapper function.
9437 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9438 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9439 assert(MapperFunc && "Expect a valid mapper function is available.");
9440 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9441 } else {
9442 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9443 // data structure.
9444 MapperCGF.EmitRuntimeCall(
9445 OMPBuilder.getOrCreateRuntimeFunction(
9446 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9447 OffloadingArgs);
9451 // Update the pointer to point to the next element that needs to be mapped,
9452 // and check whether we have mapped all elements.
9453 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9454 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9455 PtrPHI->addIncoming(PtrNext, LastBB);
9456 llvm::Value *IsDone =
9457 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9458 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9459 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9461 MapperCGF.EmitBlock(ExitBB);
9462 // Emit array deletion if this is an array section and \p MapType indicates
9463 // that deletion is required.
9464 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9465 MapName, ElementSize, DoneBB, /*IsInit=*/false);
9467 // Emit the function exit block.
9468 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9469 MapperCGF.FinishFunction();
9470 UDMMap.try_emplace(D, Fn);
9471 if (CGF) {
9472 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9473 Decls.second.push_back(D);
9477 /// Emit the array initialization or deletion portion for user-defined mapper
9478 /// code generation. First, it evaluates whether an array section is mapped and
9479 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9480 /// true, and \a MapType indicates to not delete this array, array
9481 /// initialization code is generated. If \a IsInit is false, and \a MapType
9482 /// indicates to not this array, array deletion code is generated.
9483 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9484 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9485 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9486 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9487 bool IsInit) {
9488 StringRef Prefix = IsInit ? ".init" : ".del";
9490 // Evaluate if this is an array section.
9491 llvm::BasicBlock *BodyBB =
9492 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9493 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9494 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9495 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9496 MapType,
9497 MapperCGF.Builder.getInt64(
9498 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9499 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9500 llvm::Value *DeleteCond;
9501 llvm::Value *Cond;
9502 if (IsInit) {
9503 // base != begin?
9504 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9505 // IsPtrAndObj?
9506 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9507 MapType,
9508 MapperCGF.Builder.getInt64(
9509 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9510 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9511 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9512 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9513 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9514 DeleteCond = MapperCGF.Builder.CreateIsNull(
9515 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9516 } else {
9517 Cond = IsArray;
9518 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9519 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9521 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9522 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9524 MapperCGF.EmitBlock(BodyBB);
9525 // Get the array size by multiplying element size and element number (i.e., \p
9526 // Size).
9527 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9528 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9529 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9530 // memory allocation/deletion purpose only.
9531 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9532 MapType,
9533 MapperCGF.Builder.getInt64(
9534 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9535 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9536 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9537 MapTypeArg = MapperCGF.Builder.CreateOr(
9538 MapTypeArg,
9539 MapperCGF.Builder.getInt64(
9540 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9541 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9543 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9544 // data structure.
9545 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9546 ArraySize, MapTypeArg, MapName};
9547 MapperCGF.EmitRuntimeCall(
9548 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9549 OMPRTL___tgt_push_mapper_component),
9550 OffloadingArgs);
9553 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9554 const OMPDeclareMapperDecl *D) {
9555 auto I = UDMMap.find(D);
9556 if (I != UDMMap.end())
9557 return I->second;
9558 emitUserDefinedMapper(D);
9559 return UDMMap.lookup(D);
9562 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9563 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9564 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9565 const OMPLoopDirective &D)>
9566 SizeEmitter) {
9567 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9568 const OMPExecutableDirective *TD = &D;
9569 // Get nested teams distribute kind directive, if any.
9570 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9571 TD = getNestedDistributeDirective(CGM.getContext(), D);
9572 if (!TD)
9573 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9575 const auto *LD = cast<OMPLoopDirective>(TD);
9576 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9577 return NumIterations;
9578 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9581 void CGOpenMPRuntime::emitTargetCall(
9582 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9583 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9584 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9585 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9586 const OMPLoopDirective &D)>
9587 SizeEmitter) {
9588 if (!CGF.HaveInsertPoint())
9589 return;
9591 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
9592 CGM.getLangOpts().OpenMPOffloadMandatory;
9594 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9596 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9597 D.hasClausesOfKind<OMPNowaitClause>() ||
9598 D.hasClausesOfKind<OMPInReductionClause>();
9599 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9600 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9601 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9602 PrePostActionTy &) {
9603 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9605 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9607 CodeGenFunction::OMPTargetDataInfo InputInfo;
9608 llvm::Value *MapTypesArray = nullptr;
9609 llvm::Value *MapNamesArray = nullptr;
9610 // Generate code for the host fallback function.
9611 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
9612 &CS, OffloadingMandatory](CodeGenFunction &CGF) {
9613 if (OffloadingMandatory) {
9614 CGF.Builder.CreateUnreachable();
9615 } else {
9616 if (RequiresOuterTask) {
9617 CapturedVars.clear();
9618 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9620 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9623 // Fill up the pointer arrays and transfer execution to the device.
9624 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
9625 &MapNamesArray, SizeEmitter,
9626 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
9627 if (Device.getInt() == OMPC_DEVICE_ancestor) {
9628 // Reverse offloading is not supported, so just execute on the host.
9629 FallbackGen(CGF);
9630 return;
9633 // On top of the arrays that were filled up, the target offloading call
9634 // takes as arguments the device id as well as the host pointer. The host
9635 // pointer is used by the runtime library to identify the current target
9636 // region, so it only has to be unique and not necessarily point to
9637 // anything. It could be the pointer to the outlined function that
9638 // implements the target region, but we aren't using that so that the
9639 // compiler doesn't need to keep that, and could therefore inline the host
9640 // function if proven worthwhile during optimization.
9642 // From this point on, we need to have an ID of the target region defined.
9643 assert(OutlinedFnID && "Invalid outlined function ID!");
9644 (void)OutlinedFnID;
9646 // Emit device ID if any.
9647 llvm::Value *DeviceID;
9648 if (Device.getPointer()) {
9649 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9650 Device.getInt() == OMPC_DEVICE_device_num) &&
9651 "Expected device_num modifier.");
9652 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9653 DeviceID =
9654 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9655 } else {
9656 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9659 // Emit the number of elements in the offloading arrays.
9660 llvm::Value *PointerNum =
9661 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9663 // Return value of the runtime offloading call.
9664 llvm::Value *Return;
9666 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9667 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9669 // Source location for the ident struct
9670 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9672 // Get tripcount for the target loop-based directive.
9673 llvm::Value *NumIterations =
9674 emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9676 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9677 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9678 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9679 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9680 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9681 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9682 /*isSigned=*/false);
9685 llvm::Value *ZeroArray =
9686 llvm::Constant::getNullValue(llvm::ArrayType::get(CGF.CGM.Int32Ty, 3));
9688 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9689 llvm::Value *Flags = CGF.Builder.getInt64(HasNoWait);
9691 llvm::Value *NumTeams3D =
9692 CGF.Builder.CreateInsertValue(ZeroArray, NumTeams, {0});
9693 llvm::Value *NumThreads3D =
9694 CGF.Builder.CreateInsertValue(ZeroArray, NumThreads, {0});
9696 // Arguments for the target kernel.
9697 SmallVector<llvm::Value *> KernelArgs{
9698 CGF.Builder.getInt32(/* Version */ 2),
9699 PointerNum,
9700 InputInfo.BasePointersArray.getPointer(),
9701 InputInfo.PointersArray.getPointer(),
9702 InputInfo.SizesArray.getPointer(),
9703 MapTypesArray,
9704 MapNamesArray,
9705 InputInfo.MappersArray.getPointer(),
9706 NumIterations,
9707 Flags,
9708 NumTeams3D,
9709 NumThreads3D,
9710 DynCGroupMem,
9713 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9714 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9716 // The target region is an outlined function launched by the runtime
9717 // via calls to __tgt_target_kernel().
9719 // Note that on the host and CPU targets, the runtime implementation of
9720 // these calls simply call the outlined function without forking threads.
9721 // The outlined functions themselves have runtime calls to
9722 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9723 // the compiler in emitTeamsCall() and emitParallelCall().
9725 // In contrast, on the NVPTX target, the implementation of
9726 // __tgt_target_teams() launches a GPU kernel with the requested number
9727 // of teams and threads so no additional calls to the runtime are required.
9728 // Check the error code and execute the host version if required.
9729 CGF.Builder.restoreIP(OMPBuilder.emitTargetKernel(
9730 CGF.Builder, AllocaIP, Return, RTLoc, DeviceID, NumTeams, NumThreads,
9731 OutlinedFnID, KernelArgs));
9733 llvm::BasicBlock *OffloadFailedBlock =
9734 CGF.createBasicBlock("omp_offload.failed");
9735 llvm::BasicBlock *OffloadContBlock =
9736 CGF.createBasicBlock("omp_offload.cont");
9737 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9738 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9740 CGF.EmitBlock(OffloadFailedBlock);
9741 FallbackGen(CGF);
9743 CGF.EmitBranch(OffloadContBlock);
9745 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9748 // Notify that the host version must be executed.
9749 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
9750 FallbackGen(CGF);
9753 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9754 &MapNamesArray, &CapturedVars, RequiresOuterTask,
9755 &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9756 // Fill up the arrays with all the captured variables.
9757 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9759 // Get mappable expression information.
9760 MappableExprsHandler MEHandler(D, CGF);
9761 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9762 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9764 auto RI = CS.getCapturedRecordDecl()->field_begin();
9765 auto *CV = CapturedVars.begin();
9766 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9767 CE = CS.capture_end();
9768 CI != CE; ++CI, ++RI, ++CV) {
9769 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9770 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9772 // VLA sizes are passed to the outlined region by copy and do not have map
9773 // information associated.
9774 if (CI->capturesVariableArrayType()) {
9775 CurInfo.Exprs.push_back(nullptr);
9776 CurInfo.BasePointers.push_back(*CV);
9777 CurInfo.DevicePtrDecls.push_back(nullptr);
9778 CurInfo.Pointers.push_back(*CV);
9779 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9780 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9781 // Copy to the device as an argument. No need to retrieve it.
9782 CurInfo.Types.push_back(
9783 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9784 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9785 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9786 CurInfo.Mappers.push_back(nullptr);
9787 } else {
9788 // If we have any information in the map clause, we use it, otherwise we
9789 // just do a default mapping.
9790 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9791 if (!CI->capturesThis())
9792 MappedVarSet.insert(CI->getCapturedVar());
9793 else
9794 MappedVarSet.insert(nullptr);
9795 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9796 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9797 // Generate correct mapping for variables captured by reference in
9798 // lambdas.
9799 if (CI->capturesVariable())
9800 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9801 CurInfo, LambdaPointers);
9803 // We expect to have at least an element of information for this capture.
9804 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9805 "Non-existing map pointer for capture!");
9806 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9807 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9808 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9809 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9810 "Inconsistent map information sizes!");
9812 // If there is an entry in PartialStruct it means we have a struct with
9813 // individual members mapped. Emit an extra combined entry.
9814 if (PartialStruct.Base.isValid()) {
9815 CombinedInfo.append(PartialStruct.PreliminaryMapData);
9816 MEHandler.emitCombinedEntry(
9817 CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9818 nullptr, !PartialStruct.PreliminaryMapData.BasePointers.empty());
9821 // We need to append the results of this capture to what we already have.
9822 CombinedInfo.append(CurInfo);
9824 // Adjust MEMBER_OF flags for the lambdas captures.
9825 MEHandler.adjustMemberOfForLambdaCaptures(
9826 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
9827 CombinedInfo.Types);
9828 // Map any list items in a map clause that were not captures because they
9829 // weren't referenced within the construct.
9830 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
9832 CGOpenMPRuntime::TargetDataInfo Info;
9833 // Fill up the arrays and create the arguments.
9834 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9835 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9836 llvm::codegenoptions::NoDebugInfo;
9837 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9838 EmitDebug,
9839 /*ForEndCall=*/false);
9841 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9842 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9843 CGF.VoidPtrTy, CGM.getPointerAlign());
9844 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
9845 CGM.getPointerAlign());
9846 InputInfo.SizesArray =
9847 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9848 InputInfo.MappersArray =
9849 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9850 MapTypesArray = Info.RTArgs.MapTypesArray;
9851 MapNamesArray = Info.RTArgs.MapNamesArray;
9852 if (RequiresOuterTask)
9853 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9854 else
9855 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9858 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9859 CodeGenFunction &CGF, PrePostActionTy &) {
9860 if (RequiresOuterTask) {
9861 CodeGenFunction::OMPTargetDataInfo InputInfo;
9862 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9863 } else {
9864 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9868 // If we have a target function ID it means that we need to support
9869 // offloading, otherwise, just execute on the host. We need to execute on host
9870 // regardless of the conditional in the if clause if, e.g., the user do not
9871 // specify target triples.
9872 if (OutlinedFnID) {
9873 if (IfCond) {
9874 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9875 } else {
9876 RegionCodeGenTy ThenRCG(TargetThenGen);
9877 ThenRCG(CGF);
9879 } else {
9880 RegionCodeGenTy ElseRCG(TargetElseGen);
9881 ElseRCG(CGF);
9885 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9886 StringRef ParentName) {
9887 if (!S)
9888 return;
9890 // Codegen OMP target directives that offload compute to the device.
9891 bool RequiresDeviceCodegen =
9892 isa<OMPExecutableDirective>(S) &&
9893 isOpenMPTargetExecutionDirective(
9894 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9896 if (RequiresDeviceCodegen) {
9897 const auto &E = *cast<OMPExecutableDirective>(S);
9899 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9900 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9902 // Is this a target region that should not be emitted as an entry point? If
9903 // so just signal we are done with this target region.
9904 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9905 return;
9907 switch (E.getDirectiveKind()) {
9908 case OMPD_target:
9909 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9910 cast<OMPTargetDirective>(E));
9911 break;
9912 case OMPD_target_parallel:
9913 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9914 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9915 break;
9916 case OMPD_target_teams:
9917 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9918 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9919 break;
9920 case OMPD_target_teams_distribute:
9921 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9922 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9923 break;
9924 case OMPD_target_teams_distribute_simd:
9925 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9926 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9927 break;
9928 case OMPD_target_parallel_for:
9929 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9930 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9931 break;
9932 case OMPD_target_parallel_for_simd:
9933 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9934 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9935 break;
9936 case OMPD_target_simd:
9937 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9938 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9939 break;
9940 case OMPD_target_teams_distribute_parallel_for:
9941 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9942 CGM, ParentName,
9943 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9944 break;
9945 case OMPD_target_teams_distribute_parallel_for_simd:
9946 CodeGenFunction::
9947 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9948 CGM, ParentName,
9949 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9950 break;
9951 case OMPD_parallel:
9952 case OMPD_for:
9953 case OMPD_parallel_for:
9954 case OMPD_parallel_master:
9955 case OMPD_parallel_sections:
9956 case OMPD_for_simd:
9957 case OMPD_parallel_for_simd:
9958 case OMPD_cancel:
9959 case OMPD_cancellation_point:
9960 case OMPD_ordered:
9961 case OMPD_threadprivate:
9962 case OMPD_allocate:
9963 case OMPD_task:
9964 case OMPD_simd:
9965 case OMPD_tile:
9966 case OMPD_unroll:
9967 case OMPD_sections:
9968 case OMPD_section:
9969 case OMPD_single:
9970 case OMPD_master:
9971 case OMPD_critical:
9972 case OMPD_taskyield:
9973 case OMPD_barrier:
9974 case OMPD_taskwait:
9975 case OMPD_taskgroup:
9976 case OMPD_atomic:
9977 case OMPD_flush:
9978 case OMPD_depobj:
9979 case OMPD_scan:
9980 case OMPD_teams:
9981 case OMPD_target_data:
9982 case OMPD_target_exit_data:
9983 case OMPD_target_enter_data:
9984 case OMPD_distribute:
9985 case OMPD_distribute_simd:
9986 case OMPD_distribute_parallel_for:
9987 case OMPD_distribute_parallel_for_simd:
9988 case OMPD_teams_distribute:
9989 case OMPD_teams_distribute_simd:
9990 case OMPD_teams_distribute_parallel_for:
9991 case OMPD_teams_distribute_parallel_for_simd:
9992 case OMPD_target_update:
9993 case OMPD_declare_simd:
9994 case OMPD_declare_variant:
9995 case OMPD_begin_declare_variant:
9996 case OMPD_end_declare_variant:
9997 case OMPD_declare_target:
9998 case OMPD_end_declare_target:
9999 case OMPD_declare_reduction:
10000 case OMPD_declare_mapper:
10001 case OMPD_taskloop:
10002 case OMPD_taskloop_simd:
10003 case OMPD_master_taskloop:
10004 case OMPD_master_taskloop_simd:
10005 case OMPD_parallel_master_taskloop:
10006 case OMPD_parallel_master_taskloop_simd:
10007 case OMPD_requires:
10008 case OMPD_metadirective:
10009 case OMPD_unknown:
10010 default:
10011 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10013 return;
10016 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10017 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10018 return;
10020 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10021 return;
10024 // If this is a lambda function, look into its body.
10025 if (const auto *L = dyn_cast<LambdaExpr>(S))
10026 S = L->getBody();
10028 // Keep looking for target regions recursively.
10029 for (const Stmt *II : S->children())
10030 scanForTargetRegionsFunctions(II, ParentName);
10033 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10034 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10035 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10036 if (!DevTy)
10037 return false;
10038 // Do not emit device_type(nohost) functions for the host.
10039 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10040 return true;
10041 // Do not emit device_type(host) functions for the device.
10042 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10043 return true;
10044 return false;
10047 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10048 // If emitting code for the host, we do not process FD here. Instead we do
10049 // the normal code generation.
10050 if (!CGM.getLangOpts().OpenMPIsDevice) {
10051 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10052 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10053 CGM.getLangOpts().OpenMPIsDevice))
10054 return true;
10055 return false;
10058 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10059 // Try to detect target regions in the function.
10060 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10061 StringRef Name = CGM.getMangledName(GD);
10062 scanForTargetRegionsFunctions(FD->getBody(), Name);
10063 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10064 CGM.getLangOpts().OpenMPIsDevice))
10065 return true;
10068 // Do not to emit function if it is not marked as declare target.
10069 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10070 AlreadyEmittedTargetDecls.count(VD) == 0;
10073 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10074 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10075 CGM.getLangOpts().OpenMPIsDevice))
10076 return true;
10078 if (!CGM.getLangOpts().OpenMPIsDevice)
10079 return false;
10081 // Check if there are Ctors/Dtors in this declaration and look for target
10082 // regions in it. We use the complete variant to produce the kernel name
10083 // mangling.
10084 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10085 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10086 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10087 StringRef ParentName =
10088 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10089 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10091 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10092 StringRef ParentName =
10093 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10094 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10098 // Do not to emit variable if it is not marked as declare target.
10099 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10100 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10101 cast<VarDecl>(GD.getDecl()));
10102 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10103 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10104 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10105 HasRequiresUnifiedSharedMemory)) {
10106 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10107 return true;
10109 return false;
10112 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10113 llvm::Constant *Addr) {
10114 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10115 !CGM.getLangOpts().OpenMPIsDevice)
10116 return;
10118 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10119 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10120 if (!Res) {
10121 if (CGM.getLangOpts().OpenMPIsDevice) {
10122 // Register non-target variables being emitted in device code (debug info
10123 // may cause this).
10124 StringRef VarName = CGM.getMangledName(VD);
10125 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10127 return;
10130 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10131 auto LinkageForVariable = [&VD, this]() {
10132 return CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10135 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10136 OMPBuilder.registerTargetGlobalVariable(
10137 convertCaptureClause(VD), convertDeviceClause(VD),
10138 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10139 VD->isExternallyVisible(),
10140 getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
10141 VD->getCanonicalDecl()->getBeginLoc()),
10142 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10143 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10144 CGM.getTypes().ConvertTypeForMem(
10145 CGM.getContext().getPointerType(VD->getType())),
10146 Addr);
10148 for (auto *ref : GeneratedRefs)
10149 CGM.addCompilerUsedGlobal(ref);
10151 return;
10154 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10155 if (isa<FunctionDecl>(GD.getDecl()) ||
10156 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10157 return emitTargetFunctions(GD);
10159 return emitTargetGlobalVariable(GD);
10162 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10163 for (const VarDecl *VD : DeferredGlobalVariables) {
10164 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10165 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10166 if (!Res)
10167 continue;
10168 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10169 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10170 !HasRequiresUnifiedSharedMemory) {
10171 CGM.EmitGlobal(VD);
10172 } else {
10173 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10174 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10175 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10176 HasRequiresUnifiedSharedMemory)) &&
10177 "Expected link clause or to clause with unified memory.");
10178 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10183 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10184 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10185 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10186 " Expected target-based directive.");
10189 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10190 for (const OMPClause *Clause : D->clauselists()) {
10191 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10192 HasRequiresUnifiedSharedMemory = true;
10193 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10194 } else if (const auto *AC =
10195 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10196 switch (AC->getAtomicDefaultMemOrderKind()) {
10197 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10198 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10199 break;
10200 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10201 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10202 break;
10203 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10204 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10205 break;
10206 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10207 break;
10213 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10214 return RequiresAtomicOrdering;
10217 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10218 LangAS &AS) {
10219 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10220 return false;
10221 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10222 switch(A->getAllocatorType()) {
10223 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10224 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10225 // Not supported, fallback to the default mem space.
10226 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10227 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10228 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10229 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10230 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10231 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10232 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10233 AS = LangAS::Default;
10234 return true;
10235 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10236 llvm_unreachable("Expected predefined allocator for the variables with the "
10237 "static storage.");
10239 return false;
10242 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10243 return HasRequiresUnifiedSharedMemory;
10246 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10247 CodeGenModule &CGM)
10248 : CGM(CGM) {
10249 if (CGM.getLangOpts().OpenMPIsDevice) {
10250 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10251 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10255 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10256 if (CGM.getLangOpts().OpenMPIsDevice)
10257 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10260 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10261 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10262 return true;
10264 const auto *D = cast<FunctionDecl>(GD.getDecl());
10265 // Do not to emit function if it is marked as declare target as it was already
10266 // emitted.
10267 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10268 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10269 if (auto *F = dyn_cast_or_null<llvm::Function>(
10270 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10271 return !F->isDeclaration();
10272 return false;
10274 return true;
10277 return !AlreadyEmittedTargetDecls.insert(D).second;
10280 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10281 // If we don't have entries or if we are emitting code for the device, we
10282 // don't need to do anything.
10283 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10284 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10285 (OMPBuilder.OffloadInfoManager.empty() &&
10286 !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion))
10287 return nullptr;
10289 // Create and register the function that handles the requires directives.
10290 ASTContext &C = CGM.getContext();
10292 llvm::Function *RequiresRegFn;
10294 CodeGenFunction CGF(CGM);
10295 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10296 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10297 std::string ReqName = getName({"omp_offloading", "requires_reg"});
10298 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10299 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10300 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10301 // TODO: check for other requires clauses.
10302 // The requires directive takes effect only when a target region is
10303 // present in the compilation unit. Otherwise it is ignored and not
10304 // passed to the runtime. This avoids the runtime from throwing an error
10305 // for mismatching requires clauses across compilation units that don't
10306 // contain at least 1 target region.
10307 assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion ||
10308 !OMPBuilder.OffloadInfoManager.empty()) &&
10309 "Target or declare target region expected.");
10310 if (HasRequiresUnifiedSharedMemory)
10311 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10312 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10313 CGM.getModule(), OMPRTL___tgt_register_requires),
10314 llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10315 CGF.FinishFunction();
10317 return RequiresRegFn;
10320 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10321 const OMPExecutableDirective &D,
10322 SourceLocation Loc,
10323 llvm::Function *OutlinedFn,
10324 ArrayRef<llvm::Value *> CapturedVars) {
10325 if (!CGF.HaveInsertPoint())
10326 return;
10328 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10329 CodeGenFunction::RunCleanupsScope Scope(CGF);
10331 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10332 llvm::Value *Args[] = {
10333 RTLoc,
10334 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10335 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10336 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10337 RealArgs.append(std::begin(Args), std::end(Args));
10338 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10340 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10341 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10342 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10345 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10346 const Expr *NumTeams,
10347 const Expr *ThreadLimit,
10348 SourceLocation Loc) {
10349 if (!CGF.HaveInsertPoint())
10350 return;
10352 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10354 llvm::Value *NumTeamsVal =
10355 NumTeams
10356 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10357 CGF.CGM.Int32Ty, /* isSigned = */ true)
10358 : CGF.Builder.getInt32(0);
10360 llvm::Value *ThreadLimitVal =
10361 ThreadLimit
10362 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10363 CGF.CGM.Int32Ty, /* isSigned = */ true)
10364 : CGF.Builder.getInt32(0);
10366 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10367 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10368 ThreadLimitVal};
10369 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10370 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10371 PushNumTeamsArgs);
10374 void CGOpenMPRuntime::emitTargetDataCalls(
10375 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10376 const Expr *Device, const RegionCodeGenTy &CodeGen,
10377 CGOpenMPRuntime::TargetDataInfo &Info) {
10378 if (!CGF.HaveInsertPoint())
10379 return;
10381 // Action used to replace the default codegen action and turn privatization
10382 // off.
10383 PrePostActionTy NoPrivAction;
10385 // Generate the code for the opening of the data environment. Capture all the
10386 // arguments of the runtime call by reference because they are used in the
10387 // closing of the region.
10388 auto &&BeginThenGen = [this, &D, Device, &Info,
10389 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10390 // Fill up the arrays with all the mapped variables.
10391 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10393 // Get map clause information.
10394 MappableExprsHandler MEHandler(D, CGF);
10395 MEHandler.generateAllInfo(CombinedInfo);
10397 // Fill up the arrays and create the arguments.
10398 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10399 /*IsNonContiguous=*/true);
10401 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs;
10402 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10403 llvm::codegenoptions::NoDebugInfo;
10404 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info,
10405 EmitDebug);
10407 // Emit device ID if any.
10408 llvm::Value *DeviceID = nullptr;
10409 if (Device) {
10410 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10411 CGF.Int64Ty, /*isSigned=*/true);
10412 } else {
10413 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10416 // Emit the number of elements in the offloading arrays.
10417 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10419 // Source location for the ident struct
10420 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10422 llvm::Value *OffloadingArgs[] = {RTLoc,
10423 DeviceID,
10424 PointerNum,
10425 RTArgs.BasePointersArray,
10426 RTArgs.PointersArray,
10427 RTArgs.SizesArray,
10428 RTArgs.MapTypesArray,
10429 RTArgs.MapNamesArray,
10430 RTArgs.MappersArray};
10431 CGF.EmitRuntimeCall(
10432 OMPBuilder.getOrCreateRuntimeFunction(
10433 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10434 OffloadingArgs);
10436 // If device pointer privatization is required, emit the body of the region
10437 // here. It will have to be duplicated: with and without privatization.
10438 if (!Info.CaptureDeviceAddrMap.empty())
10439 CodeGen(CGF);
10442 // Generate code for the closing of the data region.
10443 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10444 PrePostActionTy &) {
10445 assert(Info.isValid() && "Invalid data environment closing arguments.");
10447 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs;
10448 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10449 llvm::codegenoptions::NoDebugInfo;
10450 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info,
10451 EmitDebug,
10452 /*ForEndCall=*/true);
10454 // Emit device ID if any.
10455 llvm::Value *DeviceID = nullptr;
10456 if (Device) {
10457 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10458 CGF.Int64Ty, /*isSigned=*/true);
10459 } else {
10460 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10463 // Emit the number of elements in the offloading arrays.
10464 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10466 // Source location for the ident struct
10467 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10469 llvm::Value *OffloadingArgs[] = {RTLoc,
10470 DeviceID,
10471 PointerNum,
10472 RTArgs.BasePointersArray,
10473 RTArgs.PointersArray,
10474 RTArgs.SizesArray,
10475 RTArgs.MapTypesArray,
10476 RTArgs.MapNamesArray,
10477 RTArgs.MappersArray};
10478 CGF.EmitRuntimeCall(
10479 OMPBuilder.getOrCreateRuntimeFunction(
10480 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10481 OffloadingArgs);
10484 // If we need device pointer privatization, we need to emit the body of the
10485 // region with no privatization in the 'else' branch of the conditional.
10486 // Otherwise, we don't have to do anything.
10487 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10488 PrePostActionTy &) {
10489 if (!Info.CaptureDeviceAddrMap.empty()) {
10490 CodeGen.setAction(NoPrivAction);
10491 CodeGen(CGF);
10495 // We don't have to do anything to close the region if the if clause evaluates
10496 // to false.
10497 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10499 if (IfCond) {
10500 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10501 } else {
10502 RegionCodeGenTy RCG(BeginThenGen);
10503 RCG(CGF);
10506 // If we don't require privatization of device pointers, we emit the body in
10507 // between the runtime calls. This avoids duplicating the body code.
10508 if (Info.CaptureDeviceAddrMap.empty()) {
10509 CodeGen.setAction(NoPrivAction);
10510 CodeGen(CGF);
10513 if (IfCond) {
10514 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10515 } else {
10516 RegionCodeGenTy RCG(EndThenGen);
10517 RCG(CGF);
10521 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10522 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10523 const Expr *Device) {
10524 if (!CGF.HaveInsertPoint())
10525 return;
10527 assert((isa<OMPTargetEnterDataDirective>(D) ||
10528 isa<OMPTargetExitDataDirective>(D) ||
10529 isa<OMPTargetUpdateDirective>(D)) &&
10530 "Expecting either target enter, exit data, or update directives.");
10532 CodeGenFunction::OMPTargetDataInfo InputInfo;
10533 llvm::Value *MapTypesArray = nullptr;
10534 llvm::Value *MapNamesArray = nullptr;
10535 // Generate the code for the opening of the data environment.
10536 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10537 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10538 // Emit device ID if any.
10539 llvm::Value *DeviceID = nullptr;
10540 if (Device) {
10541 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10542 CGF.Int64Ty, /*isSigned=*/true);
10543 } else {
10544 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10547 // Emit the number of elements in the offloading arrays.
10548 llvm::Constant *PointerNum =
10549 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10551 // Source location for the ident struct
10552 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10554 llvm::Value *OffloadingArgs[] = {RTLoc,
10555 DeviceID,
10556 PointerNum,
10557 InputInfo.BasePointersArray.getPointer(),
10558 InputInfo.PointersArray.getPointer(),
10559 InputInfo.SizesArray.getPointer(),
10560 MapTypesArray,
10561 MapNamesArray,
10562 InputInfo.MappersArray.getPointer()};
10564 // Select the right runtime function call for each standalone
10565 // directive.
10566 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10567 RuntimeFunction RTLFn;
10568 switch (D.getDirectiveKind()) {
10569 case OMPD_target_enter_data:
10570 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10571 : OMPRTL___tgt_target_data_begin_mapper;
10572 break;
10573 case OMPD_target_exit_data:
10574 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10575 : OMPRTL___tgt_target_data_end_mapper;
10576 break;
10577 case OMPD_target_update:
10578 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10579 : OMPRTL___tgt_target_data_update_mapper;
10580 break;
10581 case OMPD_parallel:
10582 case OMPD_for:
10583 case OMPD_parallel_for:
10584 case OMPD_parallel_master:
10585 case OMPD_parallel_sections:
10586 case OMPD_for_simd:
10587 case OMPD_parallel_for_simd:
10588 case OMPD_cancel:
10589 case OMPD_cancellation_point:
10590 case OMPD_ordered:
10591 case OMPD_threadprivate:
10592 case OMPD_allocate:
10593 case OMPD_task:
10594 case OMPD_simd:
10595 case OMPD_tile:
10596 case OMPD_unroll:
10597 case OMPD_sections:
10598 case OMPD_section:
10599 case OMPD_single:
10600 case OMPD_master:
10601 case OMPD_critical:
10602 case OMPD_taskyield:
10603 case OMPD_barrier:
10604 case OMPD_taskwait:
10605 case OMPD_taskgroup:
10606 case OMPD_atomic:
10607 case OMPD_flush:
10608 case OMPD_depobj:
10609 case OMPD_scan:
10610 case OMPD_teams:
10611 case OMPD_target_data:
10612 case OMPD_distribute:
10613 case OMPD_distribute_simd:
10614 case OMPD_distribute_parallel_for:
10615 case OMPD_distribute_parallel_for_simd:
10616 case OMPD_teams_distribute:
10617 case OMPD_teams_distribute_simd:
10618 case OMPD_teams_distribute_parallel_for:
10619 case OMPD_teams_distribute_parallel_for_simd:
10620 case OMPD_declare_simd:
10621 case OMPD_declare_variant:
10622 case OMPD_begin_declare_variant:
10623 case OMPD_end_declare_variant:
10624 case OMPD_declare_target:
10625 case OMPD_end_declare_target:
10626 case OMPD_declare_reduction:
10627 case OMPD_declare_mapper:
10628 case OMPD_taskloop:
10629 case OMPD_taskloop_simd:
10630 case OMPD_master_taskloop:
10631 case OMPD_master_taskloop_simd:
10632 case OMPD_parallel_master_taskloop:
10633 case OMPD_parallel_master_taskloop_simd:
10634 case OMPD_target:
10635 case OMPD_target_simd:
10636 case OMPD_target_teams_distribute:
10637 case OMPD_target_teams_distribute_simd:
10638 case OMPD_target_teams_distribute_parallel_for:
10639 case OMPD_target_teams_distribute_parallel_for_simd:
10640 case OMPD_target_teams:
10641 case OMPD_target_parallel:
10642 case OMPD_target_parallel_for:
10643 case OMPD_target_parallel_for_simd:
10644 case OMPD_requires:
10645 case OMPD_metadirective:
10646 case OMPD_unknown:
10647 default:
10648 llvm_unreachable("Unexpected standalone target data directive.");
10649 break;
10651 CGF.EmitRuntimeCall(
10652 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10653 OffloadingArgs);
10656 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10657 &MapNamesArray](CodeGenFunction &CGF,
10658 PrePostActionTy &) {
10659 // Fill up the arrays with all the mapped variables.
10660 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10662 // Get map clause information.
10663 MappableExprsHandler MEHandler(D, CGF);
10664 MEHandler.generateAllInfo(CombinedInfo);
10666 CGOpenMPRuntime::TargetDataInfo Info;
10667 // Fill up the arrays and create the arguments.
10668 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10669 /*IsNonContiguous=*/true);
10670 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10671 D.hasClausesOfKind<OMPNowaitClause>();
10672 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10673 llvm::codegenoptions::NoDebugInfo;
10674 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10675 EmitDebug,
10676 /*ForEndCall=*/false);
10677 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10678 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10679 CGF.VoidPtrTy, CGM.getPointerAlign());
10680 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10681 CGM.getPointerAlign());
10682 InputInfo.SizesArray =
10683 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10684 InputInfo.MappersArray =
10685 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10686 MapTypesArray = Info.RTArgs.MapTypesArray;
10687 MapNamesArray = Info.RTArgs.MapNamesArray;
10688 if (RequiresOuterTask)
10689 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10690 else
10691 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10694 if (IfCond) {
10695 emitIfClause(CGF, IfCond, TargetThenGen,
10696 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10697 } else {
10698 RegionCodeGenTy ThenRCG(TargetThenGen);
10699 ThenRCG(CGF);
10703 namespace {
10704 /// Kind of parameter in a function with 'declare simd' directive.
10705 enum ParamKindTy {
10706 Linear,
10707 LinearRef,
10708 LinearUVal,
10709 LinearVal,
10710 Uniform,
10711 Vector,
10713 /// Attribute set of the parameter.
10714 struct ParamAttrTy {
10715 ParamKindTy Kind = Vector;
10716 llvm::APSInt StrideOrArg;
10717 llvm::APSInt Alignment;
10718 bool HasVarStride = false;
10720 } // namespace
10722 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10723 ArrayRef<ParamAttrTy> ParamAttrs) {
10724 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10725 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10726 // of that clause. The VLEN value must be power of 2.
10727 // In other case the notion of the function`s "characteristic data type" (CDT)
10728 // is used to compute the vector length.
10729 // CDT is defined in the following order:
10730 // a) For non-void function, the CDT is the return type.
10731 // b) If the function has any non-uniform, non-linear parameters, then the
10732 // CDT is the type of the first such parameter.
10733 // c) If the CDT determined by a) or b) above is struct, union, or class
10734 // type which is pass-by-value (except for the type that maps to the
10735 // built-in complex data type), the characteristic data type is int.
10736 // d) If none of the above three cases is applicable, the CDT is int.
10737 // The VLEN is then determined based on the CDT and the size of vector
10738 // register of that ISA for which current vector version is generated. The
10739 // VLEN is computed using the formula below:
10740 // VLEN = sizeof(vector_register) / sizeof(CDT),
10741 // where vector register size specified in section 3.2.1 Registers and the
10742 // Stack Frame of original AMD64 ABI document.
10743 QualType RetType = FD->getReturnType();
10744 if (RetType.isNull())
10745 return 0;
10746 ASTContext &C = FD->getASTContext();
10747 QualType CDT;
10748 if (!RetType.isNull() && !RetType->isVoidType()) {
10749 CDT = RetType;
10750 } else {
10751 unsigned Offset = 0;
10752 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10753 if (ParamAttrs[Offset].Kind == Vector)
10754 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10755 ++Offset;
10757 if (CDT.isNull()) {
10758 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10759 if (ParamAttrs[I + Offset].Kind == Vector) {
10760 CDT = FD->getParamDecl(I)->getType();
10761 break;
10766 if (CDT.isNull())
10767 CDT = C.IntTy;
10768 CDT = CDT->getCanonicalTypeUnqualified();
10769 if (CDT->isRecordType() || CDT->isUnionType())
10770 CDT = C.IntTy;
10771 return C.getTypeSize(CDT);
10774 /// Mangle the parameter part of the vector function name according to
10775 /// their OpenMP classification. The mangling function is defined in
10776 /// section 4.5 of the AAVFABI(2021Q1).
10777 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10778 SmallString<256> Buffer;
10779 llvm::raw_svector_ostream Out(Buffer);
10780 for (const auto &ParamAttr : ParamAttrs) {
10781 switch (ParamAttr.Kind) {
10782 case Linear:
10783 Out << 'l';
10784 break;
10785 case LinearRef:
10786 Out << 'R';
10787 break;
10788 case LinearUVal:
10789 Out << 'U';
10790 break;
10791 case LinearVal:
10792 Out << 'L';
10793 break;
10794 case Uniform:
10795 Out << 'u';
10796 break;
10797 case Vector:
10798 Out << 'v';
10799 break;
10801 if (ParamAttr.HasVarStride)
10802 Out << "s" << ParamAttr.StrideOrArg;
10803 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10804 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10805 // Don't print the step value if it is not present or if it is
10806 // equal to 1.
10807 if (ParamAttr.StrideOrArg < 0)
10808 Out << 'n' << -ParamAttr.StrideOrArg;
10809 else if (ParamAttr.StrideOrArg != 1)
10810 Out << ParamAttr.StrideOrArg;
10813 if (!!ParamAttr.Alignment)
10814 Out << 'a' << ParamAttr.Alignment;
10817 return std::string(Out.str());
10820 static void
10821 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10822 const llvm::APSInt &VLENVal,
10823 ArrayRef<ParamAttrTy> ParamAttrs,
10824 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10825 struct ISADataTy {
10826 char ISA;
10827 unsigned VecRegSize;
10829 ISADataTy ISAData[] = {
10831 'b', 128
10832 }, // SSE
10834 'c', 256
10835 }, // AVX
10837 'd', 256
10838 }, // AVX2
10840 'e', 512
10841 }, // AVX512
10843 llvm::SmallVector<char, 2> Masked;
10844 switch (State) {
10845 case OMPDeclareSimdDeclAttr::BS_Undefined:
10846 Masked.push_back('N');
10847 Masked.push_back('M');
10848 break;
10849 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10850 Masked.push_back('N');
10851 break;
10852 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10853 Masked.push_back('M');
10854 break;
10856 for (char Mask : Masked) {
10857 for (const ISADataTy &Data : ISAData) {
10858 SmallString<256> Buffer;
10859 llvm::raw_svector_ostream Out(Buffer);
10860 Out << "_ZGV" << Data.ISA << Mask;
10861 if (!VLENVal) {
10862 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10863 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10864 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10865 } else {
10866 Out << VLENVal;
10868 Out << mangleVectorParameters(ParamAttrs);
10869 Out << '_' << Fn->getName();
10870 Fn->addFnAttr(Out.str());
10875 // This are the Functions that are needed to mangle the name of the
10876 // vector functions generated by the compiler, according to the rules
10877 // defined in the "Vector Function ABI specifications for AArch64",
10878 // available at
10879 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10881 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10882 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10883 QT = QT.getCanonicalType();
10885 if (QT->isVoidType())
10886 return false;
10888 if (Kind == ParamKindTy::Uniform)
10889 return false;
10891 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10892 return false;
10894 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10895 !QT->isReferenceType())
10896 return false;
10898 return true;
10901 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10902 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10903 QT = QT.getCanonicalType();
10904 unsigned Size = C.getTypeSize(QT);
10906 // Only scalars and complex within 16 bytes wide set PVB to true.
10907 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10908 return false;
10910 if (QT->isFloatingType())
10911 return true;
10913 if (QT->isIntegerType())
10914 return true;
10916 if (QT->isPointerType())
10917 return true;
10919 // TODO: Add support for complex types (section 3.1.2, item 2).
10921 return false;
10924 /// Computes the lane size (LS) of a return type or of an input parameter,
10925 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10926 /// TODO: Add support for references, section 3.2.1, item 1.
10927 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10928 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10929 QualType PTy = QT.getCanonicalType()->getPointeeType();
10930 if (getAArch64PBV(PTy, C))
10931 return C.getTypeSize(PTy);
10933 if (getAArch64PBV(QT, C))
10934 return C.getTypeSize(QT);
10936 return C.getTypeSize(C.getUIntPtrType());
10939 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10940 // signature of the scalar function, as defined in 3.2.2 of the
10941 // AAVFABI.
10942 static std::tuple<unsigned, unsigned, bool>
10943 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10944 QualType RetType = FD->getReturnType().getCanonicalType();
10946 ASTContext &C = FD->getASTContext();
10948 bool OutputBecomesInput = false;
10950 llvm::SmallVector<unsigned, 8> Sizes;
10951 if (!RetType->isVoidType()) {
10952 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10953 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10954 OutputBecomesInput = true;
10956 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10957 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10958 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10961 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10962 // The LS of a function parameter / return value can only be a power
10963 // of 2, starting from 8 bits, up to 128.
10964 assert(llvm::all_of(Sizes,
10965 [](unsigned Size) {
10966 return Size == 8 || Size == 16 || Size == 32 ||
10967 Size == 64 || Size == 128;
10968 }) &&
10969 "Invalid size");
10971 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10972 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10973 OutputBecomesInput);
10976 // Function used to add the attribute. The parameter `VLEN` is
10977 // templated to allow the use of "x" when targeting scalable functions
10978 // for SVE.
10979 template <typename T>
10980 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10981 char ISA, StringRef ParSeq,
10982 StringRef MangledName, bool OutputBecomesInput,
10983 llvm::Function *Fn) {
10984 SmallString<256> Buffer;
10985 llvm::raw_svector_ostream Out(Buffer);
10986 Out << Prefix << ISA << LMask << VLEN;
10987 if (OutputBecomesInput)
10988 Out << "v";
10989 Out << ParSeq << "_" << MangledName;
10990 Fn->addFnAttr(Out.str());
10993 // Helper function to generate the Advanced SIMD names depending on
10994 // the value of the NDS when simdlen is not present.
10995 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10996 StringRef Prefix, char ISA,
10997 StringRef ParSeq, StringRef MangledName,
10998 bool OutputBecomesInput,
10999 llvm::Function *Fn) {
11000 switch (NDS) {
11001 case 8:
11002 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11003 OutputBecomesInput, Fn);
11004 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11005 OutputBecomesInput, Fn);
11006 break;
11007 case 16:
11008 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11009 OutputBecomesInput, Fn);
11010 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11011 OutputBecomesInput, Fn);
11012 break;
11013 case 32:
11014 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11015 OutputBecomesInput, Fn);
11016 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11017 OutputBecomesInput, Fn);
11018 break;
11019 case 64:
11020 case 128:
11021 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11022 OutputBecomesInput, Fn);
11023 break;
11024 default:
11025 llvm_unreachable("Scalar type is too wide.");
11029 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11030 static void emitAArch64DeclareSimdFunction(
11031 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11032 ArrayRef<ParamAttrTy> ParamAttrs,
11033 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11034 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11036 // Get basic data for building the vector signature.
11037 const auto Data = getNDSWDS(FD, ParamAttrs);
11038 const unsigned NDS = std::get<0>(Data);
11039 const unsigned WDS = std::get<1>(Data);
11040 const bool OutputBecomesInput = std::get<2>(Data);
11042 // Check the values provided via `simdlen` by the user.
11043 // 1. A `simdlen(1)` doesn't produce vector signatures,
11044 if (UserVLEN == 1) {
11045 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11046 DiagnosticsEngine::Warning,
11047 "The clause simdlen(1) has no effect when targeting aarch64.");
11048 CGM.getDiags().Report(SLoc, DiagID);
11049 return;
11052 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11053 // Advanced SIMD output.
11054 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11055 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11056 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11057 "power of 2 when targeting Advanced SIMD.");
11058 CGM.getDiags().Report(SLoc, DiagID);
11059 return;
11062 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11063 // limits.
11064 if (ISA == 's' && UserVLEN != 0) {
11065 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11066 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11067 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11068 "lanes in the architectural constraints "
11069 "for SVE (min is 128-bit, max is "
11070 "2048-bit, by steps of 128-bit)");
11071 CGM.getDiags().Report(SLoc, DiagID) << WDS;
11072 return;
11076 // Sort out parameter sequence.
11077 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11078 StringRef Prefix = "_ZGV";
11079 // Generate simdlen from user input (if any).
11080 if (UserVLEN) {
11081 if (ISA == 's') {
11082 // SVE generates only a masked function.
11083 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11084 OutputBecomesInput, Fn);
11085 } else {
11086 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11087 // Advanced SIMD generates one or two functions, depending on
11088 // the `[not]inbranch` clause.
11089 switch (State) {
11090 case OMPDeclareSimdDeclAttr::BS_Undefined:
11091 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11092 OutputBecomesInput, Fn);
11093 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11094 OutputBecomesInput, Fn);
11095 break;
11096 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11097 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11098 OutputBecomesInput, Fn);
11099 break;
11100 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11101 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11102 OutputBecomesInput, Fn);
11103 break;
11106 } else {
11107 // If no user simdlen is provided, follow the AAVFABI rules for
11108 // generating the vector length.
11109 if (ISA == 's') {
11110 // SVE, section 3.4.1, item 1.
11111 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11112 OutputBecomesInput, Fn);
11113 } else {
11114 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11115 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11116 // two vector names depending on the use of the clause
11117 // `[not]inbranch`.
11118 switch (State) {
11119 case OMPDeclareSimdDeclAttr::BS_Undefined:
11120 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11121 OutputBecomesInput, Fn);
11122 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11123 OutputBecomesInput, Fn);
11124 break;
11125 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11126 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11127 OutputBecomesInput, Fn);
11128 break;
11129 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11130 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11131 OutputBecomesInput, Fn);
11132 break;
11138 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11139 llvm::Function *Fn) {
11140 ASTContext &C = CGM.getContext();
11141 FD = FD->getMostRecentDecl();
11142 while (FD) {
11143 // Map params to their positions in function decl.
11144 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11145 if (isa<CXXMethodDecl>(FD))
11146 ParamPositions.try_emplace(FD, 0);
11147 unsigned ParamPos = ParamPositions.size();
11148 for (const ParmVarDecl *P : FD->parameters()) {
11149 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11150 ++ParamPos;
11152 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11153 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11154 // Mark uniform parameters.
11155 for (const Expr *E : Attr->uniforms()) {
11156 E = E->IgnoreParenImpCasts();
11157 unsigned Pos;
11158 if (isa<CXXThisExpr>(E)) {
11159 Pos = ParamPositions[FD];
11160 } else {
11161 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11162 ->getCanonicalDecl();
11163 auto It = ParamPositions.find(PVD);
11164 assert(It != ParamPositions.end() && "Function parameter not found");
11165 Pos = It->second;
11167 ParamAttrs[Pos].Kind = Uniform;
11169 // Get alignment info.
11170 auto *NI = Attr->alignments_begin();
11171 for (const Expr *E : Attr->aligneds()) {
11172 E = E->IgnoreParenImpCasts();
11173 unsigned Pos;
11174 QualType ParmTy;
11175 if (isa<CXXThisExpr>(E)) {
11176 Pos = ParamPositions[FD];
11177 ParmTy = E->getType();
11178 } else {
11179 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11180 ->getCanonicalDecl();
11181 auto It = ParamPositions.find(PVD);
11182 assert(It != ParamPositions.end() && "Function parameter not found");
11183 Pos = It->second;
11184 ParmTy = PVD->getType();
11186 ParamAttrs[Pos].Alignment =
11187 (*NI)
11188 ? (*NI)->EvaluateKnownConstInt(C)
11189 : llvm::APSInt::getUnsigned(
11190 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11191 .getQuantity());
11192 ++NI;
11194 // Mark linear parameters.
11195 auto *SI = Attr->steps_begin();
11196 auto *MI = Attr->modifiers_begin();
11197 for (const Expr *E : Attr->linears()) {
11198 E = E->IgnoreParenImpCasts();
11199 unsigned Pos;
11200 bool IsReferenceType = false;
11201 // Rescaling factor needed to compute the linear parameter
11202 // value in the mangled name.
11203 unsigned PtrRescalingFactor = 1;
11204 if (isa<CXXThisExpr>(E)) {
11205 Pos = ParamPositions[FD];
11206 auto *P = cast<PointerType>(E->getType());
11207 PtrRescalingFactor = CGM.getContext()
11208 .getTypeSizeInChars(P->getPointeeType())
11209 .getQuantity();
11210 } else {
11211 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11212 ->getCanonicalDecl();
11213 auto It = ParamPositions.find(PVD);
11214 assert(It != ParamPositions.end() && "Function parameter not found");
11215 Pos = It->second;
11216 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11217 PtrRescalingFactor = CGM.getContext()
11218 .getTypeSizeInChars(P->getPointeeType())
11219 .getQuantity();
11220 else if (PVD->getType()->isReferenceType()) {
11221 IsReferenceType = true;
11222 PtrRescalingFactor =
11223 CGM.getContext()
11224 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11225 .getQuantity();
11228 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11229 if (*MI == OMPC_LINEAR_ref)
11230 ParamAttr.Kind = LinearRef;
11231 else if (*MI == OMPC_LINEAR_uval)
11232 ParamAttr.Kind = LinearUVal;
11233 else if (IsReferenceType)
11234 ParamAttr.Kind = LinearVal;
11235 else
11236 ParamAttr.Kind = Linear;
11237 // Assuming a stride of 1, for `linear` without modifiers.
11238 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11239 if (*SI) {
11240 Expr::EvalResult Result;
11241 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11242 if (const auto *DRE =
11243 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11244 if (const auto *StridePVD =
11245 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11246 ParamAttr.HasVarStride = true;
11247 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11248 assert(It != ParamPositions.end() &&
11249 "Function parameter not found");
11250 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11253 } else {
11254 ParamAttr.StrideOrArg = Result.Val.getInt();
11257 // If we are using a linear clause on a pointer, we need to
11258 // rescale the value of linear_step with the byte size of the
11259 // pointee type.
11260 if (!ParamAttr.HasVarStride &&
11261 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11262 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11263 ++SI;
11264 ++MI;
11266 llvm::APSInt VLENVal;
11267 SourceLocation ExprLoc;
11268 const Expr *VLENExpr = Attr->getSimdlen();
11269 if (VLENExpr) {
11270 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11271 ExprLoc = VLENExpr->getExprLoc();
11273 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11274 if (CGM.getTriple().isX86()) {
11275 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11276 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11277 unsigned VLEN = VLENVal.getExtValue();
11278 StringRef MangledName = Fn->getName();
11279 if (CGM.getTarget().hasFeature("sve"))
11280 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11281 MangledName, 's', 128, Fn, ExprLoc);
11282 else if (CGM.getTarget().hasFeature("neon"))
11283 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11284 MangledName, 'n', 128, Fn, ExprLoc);
11287 FD = FD->getPreviousDecl();
11291 namespace {
11292 /// Cleanup action for doacross support.
11293 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11294 public:
11295 static const int DoacrossFinArgs = 2;
11297 private:
11298 llvm::FunctionCallee RTLFn;
11299 llvm::Value *Args[DoacrossFinArgs];
11301 public:
11302 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11303 ArrayRef<llvm::Value *> CallArgs)
11304 : RTLFn(RTLFn) {
11305 assert(CallArgs.size() == DoacrossFinArgs);
11306 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11308 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11309 if (!CGF.HaveInsertPoint())
11310 return;
11311 CGF.EmitRuntimeCall(RTLFn, Args);
11314 } // namespace
11316 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11317 const OMPLoopDirective &D,
11318 ArrayRef<Expr *> NumIterations) {
11319 if (!CGF.HaveInsertPoint())
11320 return;
11322 ASTContext &C = CGM.getContext();
11323 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11324 RecordDecl *RD;
11325 if (KmpDimTy.isNull()) {
11326 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11327 // kmp_int64 lo; // lower
11328 // kmp_int64 up; // upper
11329 // kmp_int64 st; // stride
11330 // };
11331 RD = C.buildImplicitRecord("kmp_dim");
11332 RD->startDefinition();
11333 addFieldToRecordDecl(C, RD, Int64Ty);
11334 addFieldToRecordDecl(C, RD, Int64Ty);
11335 addFieldToRecordDecl(C, RD, Int64Ty);
11336 RD->completeDefinition();
11337 KmpDimTy = C.getRecordType(RD);
11338 } else {
11339 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11341 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11342 QualType ArrayTy =
11343 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11345 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11346 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11347 enum { LowerFD = 0, UpperFD, StrideFD };
11348 // Fill dims with data.
11349 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11350 LValue DimsLVal = CGF.MakeAddrLValue(
11351 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11352 // dims.upper = num_iterations;
11353 LValue UpperLVal = CGF.EmitLValueForField(
11354 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11355 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11356 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11357 Int64Ty, NumIterations[I]->getExprLoc());
11358 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11359 // dims.stride = 1;
11360 LValue StrideLVal = CGF.EmitLValueForField(
11361 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11362 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11363 StrideLVal);
11366 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11367 // kmp_int32 num_dims, struct kmp_dim * dims);
11368 llvm::Value *Args[] = {
11369 emitUpdateLocation(CGF, D.getBeginLoc()),
11370 getThreadID(CGF, D.getBeginLoc()),
11371 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11372 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11373 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11374 CGM.VoidPtrTy)};
11376 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11377 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11378 CGF.EmitRuntimeCall(RTLFn, Args);
11379 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11380 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11381 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11382 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11383 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11384 llvm::ArrayRef(FiniArgs));
11387 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11388 const OMPDependClause *C) {
11389 QualType Int64Ty =
11390 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11391 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11392 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11393 Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11394 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11395 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11396 const Expr *CounterVal = C->getLoopData(I);
11397 assert(CounterVal);
11398 llvm::Value *CntVal = CGF.EmitScalarConversion(
11399 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11400 CounterVal->getExprLoc());
11401 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11402 /*Volatile=*/false, Int64Ty);
11404 llvm::Value *Args[] = {
11405 emitUpdateLocation(CGF, C->getBeginLoc()),
11406 getThreadID(CGF, C->getBeginLoc()),
11407 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11408 llvm::FunctionCallee RTLFn;
11409 if (C->getDependencyKind() == OMPC_DEPEND_source) {
11410 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11411 OMPRTL___kmpc_doacross_post);
11412 } else {
11413 assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11414 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11415 OMPRTL___kmpc_doacross_wait);
11417 CGF.EmitRuntimeCall(RTLFn, Args);
11420 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11421 llvm::FunctionCallee Callee,
11422 ArrayRef<llvm::Value *> Args) const {
11423 assert(Loc.isValid() && "Outlined function call location must be valid.");
11424 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11426 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11427 if (Fn->doesNotThrow()) {
11428 CGF.EmitNounwindRuntimeCall(Fn, Args);
11429 return;
11432 CGF.EmitRuntimeCall(Callee, Args);
11435 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11436 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11437 ArrayRef<llvm::Value *> Args) const {
11438 emitCall(CGF, Loc, OutlinedFn, Args);
11441 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11442 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11443 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11444 HasEmittedDeclareTargetRegion = true;
11447 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11448 const VarDecl *NativeParam,
11449 const VarDecl *TargetParam) const {
11450 return CGF.GetAddrOfLocalVar(NativeParam);
11453 /// Return allocator value from expression, or return a null allocator (default
11454 /// when no allocator specified).
11455 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11456 const Expr *Allocator) {
11457 llvm::Value *AllocVal;
11458 if (Allocator) {
11459 AllocVal = CGF.EmitScalarExpr(Allocator);
11460 // According to the standard, the original allocator type is a enum
11461 // (integer). Convert to pointer type, if required.
11462 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11463 CGF.getContext().VoidPtrTy,
11464 Allocator->getExprLoc());
11465 } else {
11466 // If no allocator specified, it defaults to the null allocator.
11467 AllocVal = llvm::Constant::getNullValue(
11468 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11470 return AllocVal;
11473 /// Return the alignment from an allocate directive if present.
11474 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11475 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11477 if (!AllocateAlignment)
11478 return nullptr;
11480 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11483 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11484 const VarDecl *VD) {
11485 if (!VD)
11486 return Address::invalid();
11487 Address UntiedAddr = Address::invalid();
11488 Address UntiedRealAddr = Address::invalid();
11489 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11490 if (It != FunctionToUntiedTaskStackMap.end()) {
11491 const UntiedLocalVarsAddressesMap &UntiedData =
11492 UntiedLocalVarsStack[It->second];
11493 auto I = UntiedData.find(VD);
11494 if (I != UntiedData.end()) {
11495 UntiedAddr = I->second.first;
11496 UntiedRealAddr = I->second.second;
11499 const VarDecl *CVD = VD->getCanonicalDecl();
11500 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11501 // Use the default allocation.
11502 if (!isAllocatableDecl(VD))
11503 return UntiedAddr;
11504 llvm::Value *Size;
11505 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11506 if (CVD->getType()->isVariablyModifiedType()) {
11507 Size = CGF.getTypeSize(CVD->getType());
11508 // Align the size: ((size + align - 1) / align) * align
11509 Size = CGF.Builder.CreateNUWAdd(
11510 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11511 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11512 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11513 } else {
11514 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11515 Size = CGM.getSize(Sz.alignTo(Align));
11517 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11518 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11519 const Expr *Allocator = AA->getAllocator();
11520 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11521 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11522 SmallVector<llvm::Value *, 4> Args;
11523 Args.push_back(ThreadID);
11524 if (Alignment)
11525 Args.push_back(Alignment);
11526 Args.push_back(Size);
11527 Args.push_back(AllocVal);
11528 llvm::omp::RuntimeFunction FnID =
11529 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11530 llvm::Value *Addr = CGF.EmitRuntimeCall(
11531 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11532 getName({CVD->getName(), ".void.addr"}));
11533 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11534 CGM.getModule(), OMPRTL___kmpc_free);
11535 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11536 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11537 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11538 if (UntiedAddr.isValid())
11539 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11541 // Cleanup action for allocate support.
11542 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11543 llvm::FunctionCallee RTLFn;
11544 SourceLocation::UIntTy LocEncoding;
11545 Address Addr;
11546 const Expr *AllocExpr;
11548 public:
11549 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11550 SourceLocation::UIntTy LocEncoding, Address Addr,
11551 const Expr *AllocExpr)
11552 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11553 AllocExpr(AllocExpr) {}
11554 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11555 if (!CGF.HaveInsertPoint())
11556 return;
11557 llvm::Value *Args[3];
11558 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11559 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11560 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11561 Addr.getPointer(), CGF.VoidPtrTy);
11562 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11563 Args[2] = AllocVal;
11564 CGF.EmitRuntimeCall(RTLFn, Args);
11567 Address VDAddr =
11568 UntiedRealAddr.isValid()
11569 ? UntiedRealAddr
11570 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11571 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11572 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11573 VDAddr, Allocator);
11574 if (UntiedRealAddr.isValid())
11575 if (auto *Region =
11576 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11577 Region->emitUntiedSwitch(CGF);
11578 return VDAddr;
11580 return UntiedAddr;
11583 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11584 const VarDecl *VD) const {
11585 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11586 if (It == FunctionToUntiedTaskStackMap.end())
11587 return false;
11588 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11591 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11592 CodeGenModule &CGM, const OMPLoopDirective &S)
11593 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11594 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11595 if (!NeedToPush)
11596 return;
11597 NontemporalDeclsSet &DS =
11598 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11599 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11600 for (const Stmt *Ref : C->private_refs()) {
11601 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11602 const ValueDecl *VD;
11603 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11604 VD = DRE->getDecl();
11605 } else {
11606 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11607 assert((ME->isImplicitCXXThis() ||
11608 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11609 "Expected member of current class.");
11610 VD = ME->getMemberDecl();
11612 DS.insert(VD);
11617 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11618 if (!NeedToPush)
11619 return;
11620 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11623 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11624 CodeGenFunction &CGF,
11625 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11626 std::pair<Address, Address>> &LocalVars)
11627 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11628 if (!NeedToPush)
11629 return;
11630 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11631 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11632 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11635 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11636 if (!NeedToPush)
11637 return;
11638 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11641 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11642 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11644 return llvm::any_of(
11645 CGM.getOpenMPRuntime().NontemporalDeclsStack,
11646 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11649 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11650 const OMPExecutableDirective &S,
11651 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11652 const {
11653 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11654 // Vars in target/task regions must be excluded completely.
11655 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11656 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11657 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11658 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11659 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11660 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11661 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11662 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11665 // Exclude vars in private clauses.
11666 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11667 for (const Expr *Ref : C->varlists()) {
11668 if (!Ref->getType()->isScalarType())
11669 continue;
11670 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11671 if (!DRE)
11672 continue;
11673 NeedToCheckForLPCs.insert(DRE->getDecl());
11676 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11677 for (const Expr *Ref : C->varlists()) {
11678 if (!Ref->getType()->isScalarType())
11679 continue;
11680 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11681 if (!DRE)
11682 continue;
11683 NeedToCheckForLPCs.insert(DRE->getDecl());
11686 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11687 for (const Expr *Ref : C->varlists()) {
11688 if (!Ref->getType()->isScalarType())
11689 continue;
11690 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11691 if (!DRE)
11692 continue;
11693 NeedToCheckForLPCs.insert(DRE->getDecl());
11696 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11697 for (const Expr *Ref : C->varlists()) {
11698 if (!Ref->getType()->isScalarType())
11699 continue;
11700 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11701 if (!DRE)
11702 continue;
11703 NeedToCheckForLPCs.insert(DRE->getDecl());
11706 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11707 for (const Expr *Ref : C->varlists()) {
11708 if (!Ref->getType()->isScalarType())
11709 continue;
11710 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11711 if (!DRE)
11712 continue;
11713 NeedToCheckForLPCs.insert(DRE->getDecl());
11716 for (const Decl *VD : NeedToCheckForLPCs) {
11717 for (const LastprivateConditionalData &Data :
11718 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11719 if (Data.DeclToUniqueName.count(VD) > 0) {
11720 if (!Data.Disabled)
11721 NeedToAddForLPCsAsDisabled.insert(VD);
11722 break;
11728 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11729 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11730 : CGM(CGF.CGM),
11731 Action((CGM.getLangOpts().OpenMP >= 50 &&
11732 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11733 [](const OMPLastprivateClause *C) {
11734 return C->getKind() ==
11735 OMPC_LASTPRIVATE_conditional;
11737 ? ActionToDo::PushAsLastprivateConditional
11738 : ActionToDo::DoNotPush) {
11739 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11740 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11741 return;
11742 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11743 "Expected a push action.");
11744 LastprivateConditionalData &Data =
11745 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11746 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11747 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11748 continue;
11750 for (const Expr *Ref : C->varlists()) {
11751 Data.DeclToUniqueName.insert(std::make_pair(
11752 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11753 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11756 Data.IVLVal = IVLVal;
11757 Data.Fn = CGF.CurFn;
11760 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11761 CodeGenFunction &CGF, const OMPExecutableDirective &S)
11762 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11763 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11764 if (CGM.getLangOpts().OpenMP < 50)
11765 return;
11766 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11767 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11768 if (!NeedToAddForLPCsAsDisabled.empty()) {
11769 Action = ActionToDo::DisableLastprivateConditional;
11770 LastprivateConditionalData &Data =
11771 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11772 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11773 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11774 Data.Fn = CGF.CurFn;
11775 Data.Disabled = true;
11779 CGOpenMPRuntime::LastprivateConditionalRAII
11780 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11781 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11782 return LastprivateConditionalRAII(CGF, S);
11785 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11786 if (CGM.getLangOpts().OpenMP < 50)
11787 return;
11788 if (Action == ActionToDo::DisableLastprivateConditional) {
11789 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11790 "Expected list of disabled private vars.");
11791 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11793 if (Action == ActionToDo::PushAsLastprivateConditional) {
11794 assert(
11795 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11796 "Expected list of lastprivate conditional vars.");
11797 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11801 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11802 const VarDecl *VD) {
11803 ASTContext &C = CGM.getContext();
11804 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11805 if (I == LastprivateConditionalToTypes.end())
11806 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11807 QualType NewType;
11808 const FieldDecl *VDField;
11809 const FieldDecl *FiredField;
11810 LValue BaseLVal;
11811 auto VI = I->getSecond().find(VD);
11812 if (VI == I->getSecond().end()) {
11813 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11814 RD->startDefinition();
11815 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11816 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11817 RD->completeDefinition();
11818 NewType = C.getRecordType(RD);
11819 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11820 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11821 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11822 } else {
11823 NewType = std::get<0>(VI->getSecond());
11824 VDField = std::get<1>(VI->getSecond());
11825 FiredField = std::get<2>(VI->getSecond());
11826 BaseLVal = std::get<3>(VI->getSecond());
11828 LValue FiredLVal =
11829 CGF.EmitLValueForField(BaseLVal, FiredField);
11830 CGF.EmitStoreOfScalar(
11831 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11832 FiredLVal);
11833 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11836 namespace {
11837 /// Checks if the lastprivate conditional variable is referenced in LHS.
11838 class LastprivateConditionalRefChecker final
11839 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11840 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11841 const Expr *FoundE = nullptr;
11842 const Decl *FoundD = nullptr;
11843 StringRef UniqueDeclName;
11844 LValue IVLVal;
11845 llvm::Function *FoundFn = nullptr;
11846 SourceLocation Loc;
11848 public:
11849 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11850 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11851 llvm::reverse(LPM)) {
11852 auto It = D.DeclToUniqueName.find(E->getDecl());
11853 if (It == D.DeclToUniqueName.end())
11854 continue;
11855 if (D.Disabled)
11856 return false;
11857 FoundE = E;
11858 FoundD = E->getDecl()->getCanonicalDecl();
11859 UniqueDeclName = It->second;
11860 IVLVal = D.IVLVal;
11861 FoundFn = D.Fn;
11862 break;
11864 return FoundE == E;
11866 bool VisitMemberExpr(const MemberExpr *E) {
11867 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11868 return false;
11869 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11870 llvm::reverse(LPM)) {
11871 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11872 if (It == D.DeclToUniqueName.end())
11873 continue;
11874 if (D.Disabled)
11875 return false;
11876 FoundE = E;
11877 FoundD = E->getMemberDecl()->getCanonicalDecl();
11878 UniqueDeclName = It->second;
11879 IVLVal = D.IVLVal;
11880 FoundFn = D.Fn;
11881 break;
11883 return FoundE == E;
11885 bool VisitStmt(const Stmt *S) {
11886 for (const Stmt *Child : S->children()) {
11887 if (!Child)
11888 continue;
11889 if (const auto *E = dyn_cast<Expr>(Child))
11890 if (!E->isGLValue())
11891 continue;
11892 if (Visit(Child))
11893 return true;
11895 return false;
11897 explicit LastprivateConditionalRefChecker(
11898 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11899 : LPM(LPM) {}
11900 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11901 getFoundData() const {
11902 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11905 } // namespace
11907 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11908 LValue IVLVal,
11909 StringRef UniqueDeclName,
11910 LValue LVal,
11911 SourceLocation Loc) {
11912 // Last updated loop counter for the lastprivate conditional var.
11913 // int<xx> last_iv = 0;
11914 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11915 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11916 LLIVTy, getName({UniqueDeclName, "iv"}));
11917 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11918 IVLVal.getAlignment().getAsAlign());
11919 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11921 // Last value of the lastprivate conditional.
11922 // decltype(priv_a) last_a;
11923 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11924 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11925 Last->setAlignment(LVal.getAlignment().getAsAlign());
11926 LValue LastLVal = CGF.MakeAddrLValue(
11927 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
11929 // Global loop counter. Required to handle inner parallel-for regions.
11930 // iv
11931 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11933 // #pragma omp critical(a)
11934 // if (last_iv <= iv) {
11935 // last_iv = iv;
11936 // last_a = priv_a;
11937 // }
11938 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11939 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11940 Action.Enter(CGF);
11941 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11942 // (last_iv <= iv) ? Check if the variable is updated and store new
11943 // value in global var.
11944 llvm::Value *CmpRes;
11945 if (IVLVal.getType()->isSignedIntegerType()) {
11946 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11947 } else {
11948 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11949 "Loop iteration variable must be integer.");
11950 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11952 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11953 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11954 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11955 // {
11956 CGF.EmitBlock(ThenBB);
11958 // last_iv = iv;
11959 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11961 // last_a = priv_a;
11962 switch (CGF.getEvaluationKind(LVal.getType())) {
11963 case TEK_Scalar: {
11964 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11965 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11966 break;
11968 case TEK_Complex: {
11969 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11970 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11971 break;
11973 case TEK_Aggregate:
11974 llvm_unreachable(
11975 "Aggregates are not supported in lastprivate conditional.");
11977 // }
11978 CGF.EmitBranch(ExitBB);
11979 // There is no need to emit line number for unconditional branch.
11980 (void)ApplyDebugLocation::CreateEmpty(CGF);
11981 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11984 if (CGM.getLangOpts().OpenMPSimd) {
11985 // Do not emit as a critical region as no parallel region could be emitted.
11986 RegionCodeGenTy ThenRCG(CodeGen);
11987 ThenRCG(CGF);
11988 } else {
11989 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11993 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11994 const Expr *LHS) {
11995 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11996 return;
11997 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11998 if (!Checker.Visit(LHS))
11999 return;
12000 const Expr *FoundE;
12001 const Decl *FoundD;
12002 StringRef UniqueDeclName;
12003 LValue IVLVal;
12004 llvm::Function *FoundFn;
12005 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12006 Checker.getFoundData();
12007 if (FoundFn != CGF.CurFn) {
12008 // Special codegen for inner parallel regions.
12009 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12010 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12011 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12012 "Lastprivate conditional is not found in outer region.");
12013 QualType StructTy = std::get<0>(It->getSecond());
12014 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12015 LValue PrivLVal = CGF.EmitLValue(FoundE);
12016 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12017 PrivLVal.getAddress(CGF),
12018 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12019 CGF.ConvertTypeForMem(StructTy));
12020 LValue BaseLVal =
12021 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12022 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12023 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12024 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12025 FiredLVal, llvm::AtomicOrdering::Unordered,
12026 /*IsVolatile=*/true, /*isInit=*/false);
12027 return;
12030 // Private address of the lastprivate conditional in the current context.
12031 // priv_a
12032 LValue LVal = CGF.EmitLValue(FoundE);
12033 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12034 FoundE->getExprLoc());
12037 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12038 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12039 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12040 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12041 return;
12042 auto Range = llvm::reverse(LastprivateConditionalStack);
12043 auto It = llvm::find_if(
12044 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12045 if (It == Range.end() || It->Fn != CGF.CurFn)
12046 return;
12047 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12048 assert(LPCI != LastprivateConditionalToTypes.end() &&
12049 "Lastprivates must be registered already.");
12050 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12051 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12052 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12053 for (const auto &Pair : It->DeclToUniqueName) {
12054 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12055 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12056 continue;
12057 auto I = LPCI->getSecond().find(Pair.first);
12058 assert(I != LPCI->getSecond().end() &&
12059 "Lastprivate must be rehistered already.");
12060 // bool Cmp = priv_a.Fired != 0;
12061 LValue BaseLVal = std::get<3>(I->getSecond());
12062 LValue FiredLVal =
12063 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12064 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12065 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12066 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12067 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12068 // if (Cmp) {
12069 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12070 CGF.EmitBlock(ThenBB);
12071 Address Addr = CGF.GetAddrOfLocalVar(VD);
12072 LValue LVal;
12073 if (VD->getType()->isReferenceType())
12074 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12075 AlignmentSource::Decl);
12076 else
12077 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12078 AlignmentSource::Decl);
12079 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12080 D.getBeginLoc());
12081 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12082 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12083 // }
12087 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12088 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12089 SourceLocation Loc) {
12090 if (CGF.getLangOpts().OpenMP < 50)
12091 return;
12092 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12093 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12094 "Unknown lastprivate conditional variable.");
12095 StringRef UniqueName = It->second;
12096 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12097 // The variable was not updated in the region - exit.
12098 if (!GV)
12099 return;
12100 LValue LPLVal = CGF.MakeAddrLValue(
12101 Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12102 PrivLVal.getType().getNonReferenceType());
12103 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12104 CGF.EmitStoreOfScalar(Res, PrivLVal);
12107 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12108 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12109 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12110 const RegionCodeGenTy &CodeGen) {
12111 llvm_unreachable("Not supported in SIMD-only mode");
12114 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12115 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12116 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12117 const RegionCodeGenTy &CodeGen) {
12118 llvm_unreachable("Not supported in SIMD-only mode");
12121 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12122 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12123 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12124 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12125 bool Tied, unsigned &NumberOfParts) {
12126 llvm_unreachable("Not supported in SIMD-only mode");
12129 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12130 SourceLocation Loc,
12131 llvm::Function *OutlinedFn,
12132 ArrayRef<llvm::Value *> CapturedVars,
12133 const Expr *IfCond,
12134 llvm::Value *NumThreads) {
12135 llvm_unreachable("Not supported in SIMD-only mode");
12138 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12139 CodeGenFunction &CGF, StringRef CriticalName,
12140 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12141 const Expr *Hint) {
12142 llvm_unreachable("Not supported in SIMD-only mode");
12145 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12146 const RegionCodeGenTy &MasterOpGen,
12147 SourceLocation Loc) {
12148 llvm_unreachable("Not supported in SIMD-only mode");
12151 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12152 const RegionCodeGenTy &MasterOpGen,
12153 SourceLocation Loc,
12154 const Expr *Filter) {
12155 llvm_unreachable("Not supported in SIMD-only mode");
12158 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12159 SourceLocation Loc) {
12160 llvm_unreachable("Not supported in SIMD-only mode");
12163 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12164 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12165 SourceLocation Loc) {
12166 llvm_unreachable("Not supported in SIMD-only mode");
12169 void CGOpenMPSIMDRuntime::emitSingleRegion(
12170 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12171 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12172 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12173 ArrayRef<const Expr *> AssignmentOps) {
12174 llvm_unreachable("Not supported in SIMD-only mode");
12177 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12178 const RegionCodeGenTy &OrderedOpGen,
12179 SourceLocation Loc,
12180 bool IsThreads) {
12181 llvm_unreachable("Not supported in SIMD-only mode");
12184 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12185 SourceLocation Loc,
12186 OpenMPDirectiveKind Kind,
12187 bool EmitChecks,
12188 bool ForceSimpleCall) {
12189 llvm_unreachable("Not supported in SIMD-only mode");
12192 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12193 CodeGenFunction &CGF, SourceLocation Loc,
12194 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12195 bool Ordered, const DispatchRTInput &DispatchValues) {
12196 llvm_unreachable("Not supported in SIMD-only mode");
12199 void CGOpenMPSIMDRuntime::emitForStaticInit(
12200 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12201 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12202 llvm_unreachable("Not supported in SIMD-only mode");
12205 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12206 CodeGenFunction &CGF, SourceLocation Loc,
12207 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12208 llvm_unreachable("Not supported in SIMD-only mode");
12211 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12212 SourceLocation Loc,
12213 unsigned IVSize,
12214 bool IVSigned) {
12215 llvm_unreachable("Not supported in SIMD-only mode");
12218 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12219 SourceLocation Loc,
12220 OpenMPDirectiveKind DKind) {
12221 llvm_unreachable("Not supported in SIMD-only mode");
12224 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12225 SourceLocation Loc,
12226 unsigned IVSize, bool IVSigned,
12227 Address IL, Address LB,
12228 Address UB, Address ST) {
12229 llvm_unreachable("Not supported in SIMD-only mode");
12232 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12233 llvm::Value *NumThreads,
12234 SourceLocation Loc) {
12235 llvm_unreachable("Not supported in SIMD-only mode");
12238 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12239 ProcBindKind ProcBind,
12240 SourceLocation Loc) {
12241 llvm_unreachable("Not supported in SIMD-only mode");
12244 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12245 const VarDecl *VD,
12246 Address VDAddr,
12247 SourceLocation Loc) {
12248 llvm_unreachable("Not supported in SIMD-only mode");
12251 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12252 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12253 CodeGenFunction *CGF) {
12254 llvm_unreachable("Not supported in SIMD-only mode");
12257 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12258 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12259 llvm_unreachable("Not supported in SIMD-only mode");
12262 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12263 ArrayRef<const Expr *> Vars,
12264 SourceLocation Loc,
12265 llvm::AtomicOrdering AO) {
12266 llvm_unreachable("Not supported in SIMD-only mode");
12269 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12270 const OMPExecutableDirective &D,
12271 llvm::Function *TaskFunction,
12272 QualType SharedsTy, Address Shareds,
12273 const Expr *IfCond,
12274 const OMPTaskDataTy &Data) {
12275 llvm_unreachable("Not supported in SIMD-only mode");
12278 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12279 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12280 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12281 const Expr *IfCond, const OMPTaskDataTy &Data) {
12282 llvm_unreachable("Not supported in SIMD-only mode");
12285 void CGOpenMPSIMDRuntime::emitReduction(
12286 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12287 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12288 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12289 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12290 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12291 ReductionOps, Options);
12294 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12295 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12296 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12297 llvm_unreachable("Not supported in SIMD-only mode");
12300 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12301 SourceLocation Loc,
12302 bool IsWorksharingReduction) {
12303 llvm_unreachable("Not supported in SIMD-only mode");
12306 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12307 SourceLocation Loc,
12308 ReductionCodeGen &RCG,
12309 unsigned N) {
12310 llvm_unreachable("Not supported in SIMD-only mode");
12313 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12314 SourceLocation Loc,
12315 llvm::Value *ReductionsPtr,
12316 LValue SharedLVal) {
12317 llvm_unreachable("Not supported in SIMD-only mode");
12320 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12321 SourceLocation Loc,
12322 const OMPTaskDataTy &Data) {
12323 llvm_unreachable("Not supported in SIMD-only mode");
12326 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12327 CodeGenFunction &CGF, SourceLocation Loc,
12328 OpenMPDirectiveKind CancelRegion) {
12329 llvm_unreachable("Not supported in SIMD-only mode");
12332 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12333 SourceLocation Loc, const Expr *IfCond,
12334 OpenMPDirectiveKind CancelRegion) {
12335 llvm_unreachable("Not supported in SIMD-only mode");
12338 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12339 const OMPExecutableDirective &D, StringRef ParentName,
12340 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12341 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12342 llvm_unreachable("Not supported in SIMD-only mode");
12345 void CGOpenMPSIMDRuntime::emitTargetCall(
12346 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12347 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12348 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12349 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12350 const OMPLoopDirective &D)>
12351 SizeEmitter) {
12352 llvm_unreachable("Not supported in SIMD-only mode");
12355 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12356 llvm_unreachable("Not supported in SIMD-only mode");
12359 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12360 llvm_unreachable("Not supported in SIMD-only mode");
12363 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12364 return false;
12367 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12368 const OMPExecutableDirective &D,
12369 SourceLocation Loc,
12370 llvm::Function *OutlinedFn,
12371 ArrayRef<llvm::Value *> CapturedVars) {
12372 llvm_unreachable("Not supported in SIMD-only mode");
12375 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12376 const Expr *NumTeams,
12377 const Expr *ThreadLimit,
12378 SourceLocation Loc) {
12379 llvm_unreachable("Not supported in SIMD-only mode");
12382 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12383 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12384 const Expr *Device, const RegionCodeGenTy &CodeGen,
12385 CGOpenMPRuntime::TargetDataInfo &Info) {
12386 llvm_unreachable("Not supported in SIMD-only mode");
12389 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12390 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12391 const Expr *Device) {
12392 llvm_unreachable("Not supported in SIMD-only mode");
12395 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12396 const OMPLoopDirective &D,
12397 ArrayRef<Expr *> NumIterations) {
12398 llvm_unreachable("Not supported in SIMD-only mode");
12401 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12402 const OMPDependClause *C) {
12403 llvm_unreachable("Not supported in SIMD-only mode");
12406 const VarDecl *
12407 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12408 const VarDecl *NativeParam) const {
12409 llvm_unreachable("Not supported in SIMD-only mode");
12412 Address
12413 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12414 const VarDecl *NativeParam,
12415 const VarDecl *TargetParam) const {
12416 llvm_unreachable("Not supported in SIMD-only mode");