[lld][WebAssembly] Add `--table-base` setting
[llvm-project.git] / clang / lib / CodeGen / CGOpenMPRuntime.cpp
blob5d947a2c0943a10af6a5d2c481edbcb41a0bdfc1
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
11 //===----------------------------------------------------------------------===//
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <cstdint>
45 #include <numeric>
46 #include <optional>
48 using namespace clang;
49 using namespace CodeGen;
50 using namespace llvm::omp;
52 namespace {
53 /// Base class for handling code generation inside OpenMP regions.
54 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55 public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
100 bool hasCancel() const { return HasCancel; }
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
106 ~CGOpenMPRegionInfo() override = default;
108 protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
111 OpenMPDirectiveKind Kind;
112 bool HasCancel;
115 /// API for captured statement code generation in OpenMP constructs.
116 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117 public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
141 private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
148 /// API for captured statement code generation in OpenMP constructs.
149 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150 public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 CGF.GetAddrOfLocalVar(PartIDVar),
166 PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171 CGF.EmitBlock(DoneBB);
172 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
173 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175 CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 CGF.GetAddrOfLocalVar(PartIDVar),
183 PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185 PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(".untied.next.");
189 CGF.EmitBranch(CGF.ReturnBlock.getBlock());
190 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192 CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(CurPoint);
194 CGF.EmitBlock(CurPoint.getBlock());
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226 TaskOutlinedRegion;
229 private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
237 /// API for inlined captured statement code generation in OpenMP
238 /// constructs.
239 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240 public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
260 llvm_unreachable("No context value for inlined OpenMP region");
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
312 ~CGOpenMPInlinedRegionInfo() override = default;
314 private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
320 /// API for captured statement code generation in OpenMP target
321 /// constructs. For this captures, implicit parameters are used instead of the
322 /// captured fields. The name of the target region has to be unique in a given
323 /// application so it is provided by the client, because only the client has
324 /// the information to generate that.
325 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326 public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
345 private:
346 StringRef HelperName;
349 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
352 /// API for generation of expressions captured in a innermost OpenMP
353 /// region.
354 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355 public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD->getType().getNonReferenceType(), VK_LValue,
375 C.getLocation());
376 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
378 (void)PrivScope.Privatize();
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
406 private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
411 /// RAII for emitting code of OpenMP constructs.
412 class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
419 public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
454 /// Values for bit flags used in the ident_t to describe the fields.
455 /// All enumeric elements are named and described in accordance with the code
456 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457 enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
483 namespace {
484 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
485 /// Values for bit flags for marking which requires clauses have been used.
486 enum OpenMPOffloadingRequiresDirFlags : int64_t {
487 /// flag undefined.
488 OMP_REQ_UNDEFINED = 0x000,
489 /// no requires clause present.
490 OMP_REQ_NONE = 0x001,
491 /// reverse_offload clause.
492 OMP_REQ_REVERSE_OFFLOAD = 0x002,
493 /// unified_address clause.
494 OMP_REQ_UNIFIED_ADDRESS = 0x004,
495 /// unified_shared_memory clause.
496 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
497 /// dynamic_allocators clause.
498 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
499 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
502 } // anonymous namespace
504 /// Describes ident structure that describes a source location.
505 /// All descriptions are taken from
506 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
507 /// Original structure:
508 /// typedef struct ident {
509 /// kmp_int32 reserved_1; /**< might be used in Fortran;
510 /// see above */
511 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
512 /// KMP_IDENT_KMPC identifies this union
513 /// member */
514 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
515 /// see above */
516 ///#if USE_ITT_BUILD
517 /// /* but currently used for storing
518 /// region-specific ITT */
519 /// /* contextual information. */
520 ///#endif /* USE_ITT_BUILD */
521 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
522 /// C++ */
523 /// char const *psource; /**< String describing the source location.
524 /// The string is composed of semi-colon separated
525 // fields which describe the source file,
526 /// the function and a pair of line numbers that
527 /// delimit the construct.
528 /// */
529 /// } ident_t;
530 enum IdentFieldIndex {
531 /// might be used in Fortran
532 IdentField_Reserved_1,
533 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
534 IdentField_Flags,
535 /// Not really used in Fortran any more
536 IdentField_Reserved_2,
537 /// Source[4] in Fortran, do not use for C++
538 IdentField_Reserved_3,
539 /// String describing the source location. The string is composed of
540 /// semi-colon separated fields which describe the source file, the function
541 /// and a pair of line numbers that delimit the construct.
542 IdentField_PSource
545 /// Schedule types for 'omp for' loops (these enumerators are taken from
546 /// the enum sched_type in kmp.h).
547 enum OpenMPSchedType {
548 /// Lower bound for default (unordered) versions.
549 OMP_sch_lower = 32,
550 OMP_sch_static_chunked = 33,
551 OMP_sch_static = 34,
552 OMP_sch_dynamic_chunked = 35,
553 OMP_sch_guided_chunked = 36,
554 OMP_sch_runtime = 37,
555 OMP_sch_auto = 38,
556 /// static with chunk adjustment (e.g., simd)
557 OMP_sch_static_balanced_chunked = 45,
558 /// Lower bound for 'ordered' versions.
559 OMP_ord_lower = 64,
560 OMP_ord_static_chunked = 65,
561 OMP_ord_static = 66,
562 OMP_ord_dynamic_chunked = 67,
563 OMP_ord_guided_chunked = 68,
564 OMP_ord_runtime = 69,
565 OMP_ord_auto = 70,
566 OMP_sch_default = OMP_sch_static,
567 /// dist_schedule types
568 OMP_dist_sch_static_chunked = 91,
569 OMP_dist_sch_static = 92,
570 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
571 /// Set if the monotonic schedule modifier was present.
572 OMP_sch_modifier_monotonic = (1 << 29),
573 /// Set if the nonmonotonic schedule modifier was present.
574 OMP_sch_modifier_nonmonotonic = (1 << 30),
577 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
578 /// region.
579 class CleanupTy final : public EHScopeStack::Cleanup {
580 PrePostActionTy *Action;
582 public:
583 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
584 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
585 if (!CGF.HaveInsertPoint())
586 return;
587 Action->Exit(CGF);
591 } // anonymous namespace
593 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
594 CodeGenFunction::RunCleanupsScope Scope(CGF);
595 if (PrePostAction) {
596 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
597 Callback(CodeGen, CGF, *PrePostAction);
598 } else {
599 PrePostActionTy Action;
600 Callback(CodeGen, CGF, Action);
604 /// Check if the combiner is a call to UDR combiner and if it is so return the
605 /// UDR decl used for reduction.
606 static const OMPDeclareReductionDecl *
607 getReductionInit(const Expr *ReductionOp) {
608 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
609 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
610 if (const auto *DRE =
611 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
612 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
613 return DRD;
614 return nullptr;
617 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
618 const OMPDeclareReductionDecl *DRD,
619 const Expr *InitOp,
620 Address Private, Address Original,
621 QualType Ty) {
622 if (DRD->getInitializer()) {
623 std::pair<llvm::Function *, llvm::Function *> Reduction =
624 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
625 const auto *CE = cast<CallExpr>(InitOp);
626 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
627 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
628 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
629 const auto *LHSDRE =
630 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
631 const auto *RHSDRE =
632 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
633 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
634 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
635 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
636 (void)PrivateScope.Privatize();
637 RValue Func = RValue::get(Reduction.second);
638 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
639 CGF.EmitIgnoredExpr(InitOp);
640 } else {
641 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
642 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
643 auto *GV = new llvm::GlobalVariable(
644 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
645 llvm::GlobalValue::PrivateLinkage, Init, Name);
646 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
647 RValue InitRVal;
648 switch (CGF.getEvaluationKind(Ty)) {
649 case TEK_Scalar:
650 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
651 break;
652 case TEK_Complex:
653 InitRVal =
654 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
655 break;
656 case TEK_Aggregate: {
657 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
658 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
659 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
660 /*IsInitializer=*/false);
661 return;
664 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
665 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
666 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
667 /*IsInitializer=*/false);
671 /// Emit initialization of arrays of complex types.
672 /// \param DestAddr Address of the array.
673 /// \param Type Type of array.
674 /// \param Init Initial expression of array.
675 /// \param SrcAddr Address of the original array.
676 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
677 QualType Type, bool EmitDeclareReductionInit,
678 const Expr *Init,
679 const OMPDeclareReductionDecl *DRD,
680 Address SrcAddr = Address::invalid()) {
681 // Perform element-by-element initialization.
682 QualType ElementTy;
684 // Drill down to the base element type on both arrays.
685 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
686 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
687 if (DRD)
688 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
690 llvm::Value *SrcBegin = nullptr;
691 if (DRD)
692 SrcBegin = SrcAddr.getPointer();
693 llvm::Value *DestBegin = DestAddr.getPointer();
694 // Cast from pointer to array type to pointer to single element.
695 llvm::Value *DestEnd =
696 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
697 // The basic structure here is a while-do loop.
698 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
699 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
700 llvm::Value *IsEmpty =
701 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
702 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
704 // Enter the loop body, making that address the current address.
705 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
706 CGF.EmitBlock(BodyBB);
708 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
710 llvm::PHINode *SrcElementPHI = nullptr;
711 Address SrcElementCurrent = Address::invalid();
712 if (DRD) {
713 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
714 "omp.arraycpy.srcElementPast");
715 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
716 SrcElementCurrent =
717 Address(SrcElementPHI, SrcAddr.getElementType(),
718 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
721 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
722 DestElementPHI->addIncoming(DestBegin, EntryBB);
723 Address DestElementCurrent =
724 Address(DestElementPHI, DestAddr.getElementType(),
725 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
727 // Emit copy.
729 CodeGenFunction::RunCleanupsScope InitScope(CGF);
730 if (EmitDeclareReductionInit) {
731 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
732 SrcElementCurrent, ElementTy);
733 } else
734 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
735 /*IsInitializer=*/false);
738 if (DRD) {
739 // Shift the address forward by one element.
740 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
741 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
742 "omp.arraycpy.dest.element");
743 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
746 // Shift the address forward by one element.
747 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
748 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
749 "omp.arraycpy.dest.element");
750 // Check whether we've reached the end.
751 llvm::Value *Done =
752 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
753 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
754 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
756 // Done.
757 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
760 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
761 return CGF.EmitOMPSharedLValue(E);
764 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
765 const Expr *E) {
766 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
767 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
768 return LValue();
771 void ReductionCodeGen::emitAggregateInitialization(
772 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
773 const OMPDeclareReductionDecl *DRD) {
774 // Emit VarDecl with copy init for arrays.
775 // Get the address of the original variable captured in current
776 // captured region.
777 const auto *PrivateVD =
778 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
779 bool EmitDeclareReductionInit =
780 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
781 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
782 EmitDeclareReductionInit,
783 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
784 : PrivateVD->getInit(),
785 DRD, SharedAddr);
788 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
789 ArrayRef<const Expr *> Origs,
790 ArrayRef<const Expr *> Privates,
791 ArrayRef<const Expr *> ReductionOps) {
792 ClausesData.reserve(Shareds.size());
793 SharedAddresses.reserve(Shareds.size());
794 Sizes.reserve(Shareds.size());
795 BaseDecls.reserve(Shareds.size());
796 const auto *IOrig = Origs.begin();
797 const auto *IPriv = Privates.begin();
798 const auto *IRed = ReductionOps.begin();
799 for (const Expr *Ref : Shareds) {
800 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
801 std::advance(IOrig, 1);
802 std::advance(IPriv, 1);
803 std::advance(IRed, 1);
807 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
808 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
809 "Number of generated lvalues must be exactly N.");
810 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
811 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
812 SharedAddresses.emplace_back(First, Second);
813 if (ClausesData[N].Shared == ClausesData[N].Ref) {
814 OrigAddresses.emplace_back(First, Second);
815 } else {
816 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
817 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
818 OrigAddresses.emplace_back(First, Second);
822 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
823 QualType PrivateType = getPrivateType(N);
824 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
825 if (!PrivateType->isVariablyModifiedType()) {
826 Sizes.emplace_back(
827 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
828 nullptr);
829 return;
831 llvm::Value *Size;
832 llvm::Value *SizeInChars;
833 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
834 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
835 if (AsArraySection) {
836 Size = CGF.Builder.CreatePtrDiff(ElemType,
837 OrigAddresses[N].second.getPointer(CGF),
838 OrigAddresses[N].first.getPointer(CGF));
839 Size = CGF.Builder.CreateNUWAdd(
840 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
841 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
842 } else {
843 SizeInChars =
844 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
845 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
847 Sizes.emplace_back(SizeInChars, Size);
848 CodeGenFunction::OpaqueValueMapping OpaqueMap(
849 CGF,
850 cast<OpaqueValueExpr>(
851 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
852 RValue::get(Size));
853 CGF.EmitVariablyModifiedType(PrivateType);
856 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
857 llvm::Value *Size) {
858 QualType PrivateType = getPrivateType(N);
859 if (!PrivateType->isVariablyModifiedType()) {
860 assert(!Size && !Sizes[N].second &&
861 "Size should be nullptr for non-variably modified reduction "
862 "items.");
863 return;
865 CodeGenFunction::OpaqueValueMapping OpaqueMap(
866 CGF,
867 cast<OpaqueValueExpr>(
868 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
869 RValue::get(Size));
870 CGF.EmitVariablyModifiedType(PrivateType);
873 void ReductionCodeGen::emitInitialization(
874 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
875 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
876 assert(SharedAddresses.size() > N && "No variable was generated");
877 const auto *PrivateVD =
878 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
879 const OMPDeclareReductionDecl *DRD =
880 getReductionInit(ClausesData[N].ReductionOp);
881 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
882 if (DRD && DRD->getInitializer())
883 (void)DefaultInit(CGF);
884 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
885 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
886 (void)DefaultInit(CGF);
887 QualType SharedType = SharedAddresses[N].first.getType();
888 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
889 PrivateAddr, SharedAddr, SharedType);
890 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
891 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
892 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
893 PrivateVD->getType().getQualifiers(),
894 /*IsInitializer=*/false);
898 bool ReductionCodeGen::needCleanups(unsigned N) {
899 QualType PrivateType = getPrivateType(N);
900 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
901 return DTorKind != QualType::DK_none;
904 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
905 Address PrivateAddr) {
906 QualType PrivateType = getPrivateType(N);
907 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
908 if (needCleanups(N)) {
909 PrivateAddr =
910 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
911 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
915 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
916 LValue BaseLV) {
917 BaseTy = BaseTy.getNonReferenceType();
918 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
919 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
920 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
921 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
922 } else {
923 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
924 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
926 BaseTy = BaseTy->getPointeeType();
928 return CGF.MakeAddrLValue(
929 BaseLV.getAddress(CGF).withElementType(CGF.ConvertTypeForMem(ElTy)),
930 BaseLV.getType(), BaseLV.getBaseInfo(),
931 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
934 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
935 Address OriginalBaseAddress, llvm::Value *Addr) {
936 Address Tmp = Address::invalid();
937 Address TopTmp = Address::invalid();
938 Address MostTopTmp = Address::invalid();
939 BaseTy = BaseTy.getNonReferenceType();
940 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
941 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
942 Tmp = CGF.CreateMemTemp(BaseTy);
943 if (TopTmp.isValid())
944 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
945 else
946 MostTopTmp = Tmp;
947 TopTmp = Tmp;
948 BaseTy = BaseTy->getPointeeType();
951 if (Tmp.isValid()) {
952 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
953 Addr, Tmp.getElementType());
954 CGF.Builder.CreateStore(Addr, Tmp);
955 return MostTopTmp;
958 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
959 Addr, OriginalBaseAddress.getType());
960 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
963 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
964 const VarDecl *OrigVD = nullptr;
965 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
966 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
967 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
968 Base = TempOASE->getBase()->IgnoreParenImpCasts();
969 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
970 Base = TempASE->getBase()->IgnoreParenImpCasts();
971 DE = cast<DeclRefExpr>(Base);
972 OrigVD = cast<VarDecl>(DE->getDecl());
973 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
974 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
975 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
976 Base = TempASE->getBase()->IgnoreParenImpCasts();
977 DE = cast<DeclRefExpr>(Base);
978 OrigVD = cast<VarDecl>(DE->getDecl());
980 return OrigVD;
983 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
984 Address PrivateAddr) {
985 const DeclRefExpr *DE;
986 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
987 BaseDecls.emplace_back(OrigVD);
988 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
989 LValue BaseLValue =
990 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
991 OriginalBaseLValue);
992 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
993 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
994 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
995 SharedAddr.getPointer());
996 llvm::Value *PrivatePointer =
997 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
998 PrivateAddr.getPointer(), SharedAddr.getType());
999 llvm::Value *Ptr = CGF.Builder.CreateGEP(
1000 SharedAddr.getElementType(), PrivatePointer, Adjustment);
1001 return castToBase(CGF, OrigVD->getType(),
1002 SharedAddresses[N].first.getType(),
1003 OriginalBaseLValue.getAddress(CGF), Ptr);
1005 BaseDecls.emplace_back(
1006 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1007 return PrivateAddr;
1010 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1011 const OMPDeclareReductionDecl *DRD =
1012 getReductionInit(ClausesData[N].ReductionOp);
1013 return DRD && DRD->getInitializer();
1016 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1017 return CGF.EmitLoadOfPointerLValue(
1018 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1019 getThreadIDVariable()->getType()->castAs<PointerType>());
1022 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1023 if (!CGF.HaveInsertPoint())
1024 return;
1025 // 1.2.2 OpenMP Language Terminology
1026 // Structured block - An executable statement with a single entry at the
1027 // top and a single exit at the bottom.
1028 // The point of exit cannot be a branch out of the structured block.
1029 // longjmp() and throw() must not violate the entry/exit criteria.
1030 CGF.EHStack.pushTerminate();
1031 if (S)
1032 CGF.incrementProfileCounter(S);
1033 CodeGen(CGF);
1034 CGF.EHStack.popTerminate();
1037 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1038 CodeGenFunction &CGF) {
1039 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1040 getThreadIDVariable()->getType(),
1041 AlignmentSource::Decl);
1044 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1045 QualType FieldTy) {
1046 auto *Field = FieldDecl::Create(
1047 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1048 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1049 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1050 Field->setAccess(AS_public);
1051 DC->addDecl(Field);
1052 return Field;
1055 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1056 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1057 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1058 llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsTargetDevice,
1059 isGPU(), hasRequiresUnifiedSharedMemory(),
1060 CGM.getLangOpts().OpenMPOffloadMandatory);
1061 OMPBuilder.initialize(CGM.getLangOpts().OpenMPIsTargetDevice
1062 ? CGM.getLangOpts().OMPHostIRFile
1063 : StringRef{});
1064 OMPBuilder.setConfig(Config);
1067 void CGOpenMPRuntime::clear() {
1068 InternalVars.clear();
1069 // Clean non-target variable declarations possibly used only in debug info.
1070 for (const auto &Data : EmittedNonTargetVariables) {
1071 if (!Data.getValue().pointsToAliveValue())
1072 continue;
1073 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1074 if (!GV)
1075 continue;
1076 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1077 continue;
1078 GV->eraseFromParent();
1082 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1083 return OMPBuilder.createPlatformSpecificName(Parts);
1086 static llvm::Function *
1087 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1088 const Expr *CombinerInitializer, const VarDecl *In,
1089 const VarDecl *Out, bool IsCombiner) {
1090 // void .omp_combiner.(Ty *in, Ty *out);
1091 ASTContext &C = CGM.getContext();
1092 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1093 FunctionArgList Args;
1094 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1095 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1096 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1097 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1098 Args.push_back(&OmpOutParm);
1099 Args.push_back(&OmpInParm);
1100 const CGFunctionInfo &FnInfo =
1101 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1102 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1103 std::string Name = CGM.getOpenMPRuntime().getName(
1104 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1105 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1106 Name, &CGM.getModule());
1107 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1108 if (CGM.getLangOpts().Optimize) {
1109 Fn->removeFnAttr(llvm::Attribute::NoInline);
1110 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1111 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1113 CodeGenFunction CGF(CGM);
1114 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1115 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1116 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1117 Out->getLocation());
1118 CodeGenFunction::OMPPrivateScope Scope(CGF);
1119 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1120 Scope.addPrivate(
1121 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1122 .getAddress(CGF));
1123 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1124 Scope.addPrivate(
1125 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1126 .getAddress(CGF));
1127 (void)Scope.Privatize();
1128 if (!IsCombiner && Out->hasInit() &&
1129 !CGF.isTrivialInitializer(Out->getInit())) {
1130 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1131 Out->getType().getQualifiers(),
1132 /*IsInitializer=*/true);
1134 if (CombinerInitializer)
1135 CGF.EmitIgnoredExpr(CombinerInitializer);
1136 Scope.ForceCleanup();
1137 CGF.FinishFunction();
1138 return Fn;
1141 void CGOpenMPRuntime::emitUserDefinedReduction(
1142 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1143 if (UDRMap.count(D) > 0)
1144 return;
1145 llvm::Function *Combiner = emitCombinerOrInitializer(
1146 CGM, D->getType(), D->getCombiner(),
1147 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1148 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1149 /*IsCombiner=*/true);
1150 llvm::Function *Initializer = nullptr;
1151 if (const Expr *Init = D->getInitializer()) {
1152 Initializer = emitCombinerOrInitializer(
1153 CGM, D->getType(),
1154 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1155 : nullptr,
1156 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1157 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1158 /*IsCombiner=*/false);
1160 UDRMap.try_emplace(D, Combiner, Initializer);
1161 if (CGF) {
1162 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1163 Decls.second.push_back(D);
1167 std::pair<llvm::Function *, llvm::Function *>
1168 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1169 auto I = UDRMap.find(D);
1170 if (I != UDRMap.end())
1171 return I->second;
1172 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1173 return UDRMap.lookup(D);
1176 namespace {
1177 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1178 // Builder if one is present.
1179 struct PushAndPopStackRAII {
1180 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1181 bool HasCancel, llvm::omp::Directive Kind)
1182 : OMPBuilder(OMPBuilder) {
1183 if (!OMPBuilder)
1184 return;
1186 // The following callback is the crucial part of clangs cleanup process.
1188 // NOTE:
1189 // Once the OpenMPIRBuilder is used to create parallel regions (and
1190 // similar), the cancellation destination (Dest below) is determined via
1191 // IP. That means if we have variables to finalize we split the block at IP,
1192 // use the new block (=BB) as destination to build a JumpDest (via
1193 // getJumpDestInCurrentScope(BB)) which then is fed to
1194 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1195 // to push & pop an FinalizationInfo object.
1196 // The FiniCB will still be needed but at the point where the
1197 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1198 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1199 assert(IP.getBlock()->end() == IP.getPoint() &&
1200 "Clang CG should cause non-terminated block!");
1201 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1202 CGF.Builder.restoreIP(IP);
1203 CodeGenFunction::JumpDest Dest =
1204 CGF.getOMPCancelDestination(OMPD_parallel);
1205 CGF.EmitBranchThroughCleanup(Dest);
1208 // TODO: Remove this once we emit parallel regions through the
1209 // OpenMPIRBuilder as it can do this setup internally.
1210 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1211 OMPBuilder->pushFinalizationCB(std::move(FI));
1213 ~PushAndPopStackRAII() {
1214 if (OMPBuilder)
1215 OMPBuilder->popFinalizationCB();
1217 llvm::OpenMPIRBuilder *OMPBuilder;
1219 } // namespace
1221 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1222 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1223 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1224 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1225 assert(ThreadIDVar->getType()->isPointerType() &&
1226 "thread id variable must be of type kmp_int32 *");
1227 CodeGenFunction CGF(CGM, true);
1228 bool HasCancel = false;
1229 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1230 HasCancel = OPD->hasCancel();
1231 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1232 HasCancel = OPD->hasCancel();
1233 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1234 HasCancel = OPSD->hasCancel();
1235 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1236 HasCancel = OPFD->hasCancel();
1237 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1238 HasCancel = OPFD->hasCancel();
1239 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1240 HasCancel = OPFD->hasCancel();
1241 else if (const auto *OPFD =
1242 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1243 HasCancel = OPFD->hasCancel();
1244 else if (const auto *OPFD =
1245 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1246 HasCancel = OPFD->hasCancel();
1248 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1249 // parallel region to make cancellation barriers work properly.
1250 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1251 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1252 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1253 HasCancel, OutlinedHelperName);
1254 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1255 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1258 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1259 std::string Suffix = getName({"omp_outlined"});
1260 return (Name + Suffix).str();
1263 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1264 return getOutlinedHelperName(CGF.CurFn->getName());
1267 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1268 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1269 return (Name + Suffix).str();
1272 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1273 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1274 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1275 const RegionCodeGenTy &CodeGen) {
1276 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1277 return emitParallelOrTeamsOutlinedFunction(
1278 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1279 CodeGen);
1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1284 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1285 const RegionCodeGenTy &CodeGen) {
1286 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1287 return emitParallelOrTeamsOutlinedFunction(
1288 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1289 CodeGen);
1292 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1293 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1294 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1295 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1296 bool Tied, unsigned &NumberOfParts) {
1297 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1298 PrePostActionTy &) {
1299 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1300 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1301 llvm::Value *TaskArgs[] = {
1302 UpLoc, ThreadID,
1303 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1304 TaskTVar->getType()->castAs<PointerType>())
1305 .getPointer(CGF)};
1306 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1307 CGM.getModule(), OMPRTL___kmpc_omp_task),
1308 TaskArgs);
1310 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1311 UntiedCodeGen);
1312 CodeGen.setAction(Action);
1313 assert(!ThreadIDVar->getType()->isPointerType() &&
1314 "thread id variable must be of type kmp_int32 for tasks");
1315 const OpenMPDirectiveKind Region =
1316 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1317 : OMPD_task;
1318 const CapturedStmt *CS = D.getCapturedStmt(Region);
1319 bool HasCancel = false;
1320 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1321 HasCancel = TD->hasCancel();
1322 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1323 HasCancel = TD->hasCancel();
1324 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1325 HasCancel = TD->hasCancel();
1326 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1327 HasCancel = TD->hasCancel();
1329 CodeGenFunction CGF(CGM, true);
1330 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1331 InnermostKind, HasCancel, Action);
1332 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1333 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1334 if (!Tied)
1335 NumberOfParts = Action.getNumberOfParts();
1336 return Res;
1339 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1340 bool AtCurrentPoint) {
1341 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1342 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1344 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1345 if (AtCurrentPoint) {
1346 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1347 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1348 } else {
1349 Elem.second.ServiceInsertPt =
1350 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1351 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1355 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1356 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1357 if (Elem.second.ServiceInsertPt) {
1358 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1359 Elem.second.ServiceInsertPt = nullptr;
1360 Ptr->eraseFromParent();
1364 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1365 SourceLocation Loc,
1366 SmallString<128> &Buffer) {
1367 llvm::raw_svector_ostream OS(Buffer);
1368 // Build debug location
1369 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1370 OS << ";" << PLoc.getFilename() << ";";
1371 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1372 OS << FD->getQualifiedNameAsString();
1373 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1374 return OS.str();
1377 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1378 SourceLocation Loc,
1379 unsigned Flags, bool EmitLoc) {
1380 uint32_t SrcLocStrSize;
1381 llvm::Constant *SrcLocStr;
1382 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1383 llvm::codegenoptions::NoDebugInfo) ||
1384 Loc.isInvalid()) {
1385 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1386 } else {
1387 std::string FunctionName;
1388 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1389 FunctionName = FD->getQualifiedNameAsString();
1390 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1391 const char *FileName = PLoc.getFilename();
1392 unsigned Line = PLoc.getLine();
1393 unsigned Column = PLoc.getColumn();
1394 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1395 Column, SrcLocStrSize);
1397 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1398 return OMPBuilder.getOrCreateIdent(
1399 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1402 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1403 SourceLocation Loc) {
1404 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1405 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1406 // the clang invariants used below might be broken.
1407 if (CGM.getLangOpts().OpenMPIRBuilder) {
1408 SmallString<128> Buffer;
1409 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1410 uint32_t SrcLocStrSize;
1411 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1412 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1413 return OMPBuilder.getOrCreateThreadID(
1414 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1417 llvm::Value *ThreadID = nullptr;
1418 // Check whether we've already cached a load of the thread id in this
1419 // function.
1420 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1421 if (I != OpenMPLocThreadIDMap.end()) {
1422 ThreadID = I->second.ThreadID;
1423 if (ThreadID != nullptr)
1424 return ThreadID;
1426 // If exceptions are enabled, do not use parameter to avoid possible crash.
1427 if (auto *OMPRegionInfo =
1428 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1429 if (OMPRegionInfo->getThreadIDVariable()) {
1430 // Check if this an outlined function with thread id passed as argument.
1431 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1432 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1433 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1434 !CGF.getLangOpts().CXXExceptions ||
1435 CGF.Builder.GetInsertBlock() == TopBlock ||
1436 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1437 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1438 TopBlock ||
1439 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1440 CGF.Builder.GetInsertBlock()) {
1441 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1442 // If value loaded in entry block, cache it and use it everywhere in
1443 // function.
1444 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1445 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1446 Elem.second.ThreadID = ThreadID;
1448 return ThreadID;
1453 // This is not an outlined function region - need to call __kmpc_int32
1454 // kmpc_global_thread_num(ident_t *loc).
1455 // Generate thread id value and cache this value for use across the
1456 // function.
1457 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1458 if (!Elem.second.ServiceInsertPt)
1459 setLocThreadIdInsertPt(CGF);
1460 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1461 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1462 llvm::CallInst *Call = CGF.Builder.CreateCall(
1463 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1464 OMPRTL___kmpc_global_thread_num),
1465 emitUpdateLocation(CGF, Loc));
1466 Call->setCallingConv(CGF.getRuntimeCC());
1467 Elem.second.ThreadID = Call;
1468 return Call;
1471 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1472 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1473 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1474 clearLocThreadIdInsertPt(CGF);
1475 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1477 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1478 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1479 UDRMap.erase(D);
1480 FunctionUDRMap.erase(CGF.CurFn);
1482 auto I = FunctionUDMMap.find(CGF.CurFn);
1483 if (I != FunctionUDMMap.end()) {
1484 for(const auto *D : I->second)
1485 UDMMap.erase(D);
1486 FunctionUDMMap.erase(I);
1488 LastprivateConditionalToTypes.erase(CGF.CurFn);
1489 FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1492 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1493 return OMPBuilder.IdentPtr;
1496 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1497 if (!Kmpc_MicroTy) {
1498 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1499 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1500 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1501 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1503 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1506 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1507 convertDeviceClause(const VarDecl *VD) {
1508 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1509 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1510 if (!DevTy)
1511 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1513 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1514 case OMPDeclareTargetDeclAttr::DT_Host:
1515 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1516 break;
1517 case OMPDeclareTargetDeclAttr::DT_NoHost:
1518 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1519 break;
1520 case OMPDeclareTargetDeclAttr::DT_Any:
1521 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1522 break;
1523 default:
1524 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1525 break;
1529 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1530 convertCaptureClause(const VarDecl *VD) {
1531 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1532 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1533 if (!MapType)
1534 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1535 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1536 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1537 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1538 break;
1539 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1540 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1541 break;
1542 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1543 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1544 break;
1545 default:
1546 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1547 break;
1551 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1552 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1553 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1555 auto FileInfoCallBack = [&]() {
1556 SourceManager &SM = CGM.getContext().getSourceManager();
1557 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1559 llvm::sys::fs::UniqueID ID;
1560 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1561 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1564 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1567 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1570 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1571 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1573 auto LinkageForVariable = [&VD, this]() {
1574 return CGM.getLLVMLinkageVarDefinition(VD);
1577 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1579 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1580 CGM.getContext().getPointerType(VD->getType()));
1581 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1582 convertCaptureClause(VD), convertDeviceClause(VD),
1583 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1584 VD->isExternallyVisible(),
1585 getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1586 VD->getCanonicalDecl()->getBeginLoc()),
1587 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1588 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1589 LinkageForVariable);
1591 if (!addr)
1592 return Address::invalid();
1593 return Address(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1596 llvm::Constant *
1597 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1598 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1599 !CGM.getContext().getTargetInfo().isTLSSupported());
1600 // Lookup the entry, lazily creating it if necessary.
1601 std::string Suffix = getName({"cache", ""});
1602 return OMPBuilder.getOrCreateInternalVariable(
1603 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1606 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1607 const VarDecl *VD,
1608 Address VDAddr,
1609 SourceLocation Loc) {
1610 if (CGM.getLangOpts().OpenMPUseTLS &&
1611 CGM.getContext().getTargetInfo().isTLSSupported())
1612 return VDAddr;
1614 llvm::Type *VarTy = VDAddr.getElementType();
1615 llvm::Value *Args[] = {
1616 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1617 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1618 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1619 getOrCreateThreadPrivateCache(VD)};
1620 return Address(
1621 CGF.EmitRuntimeCall(
1622 OMPBuilder.getOrCreateRuntimeFunction(
1623 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1624 Args),
1625 CGF.Int8Ty, VDAddr.getAlignment());
1628 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1629 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1630 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1631 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1632 // library.
1633 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1634 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1635 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1636 OMPLoc);
1637 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1638 // to register constructor/destructor for variable.
1639 llvm::Value *Args[] = {
1640 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1641 Ctor, CopyCtor, Dtor};
1642 CGF.EmitRuntimeCall(
1643 OMPBuilder.getOrCreateRuntimeFunction(
1644 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1645 Args);
1648 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1649 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1650 bool PerformInit, CodeGenFunction *CGF) {
1651 if (CGM.getLangOpts().OpenMPUseTLS &&
1652 CGM.getContext().getTargetInfo().isTLSSupported())
1653 return nullptr;
1655 VD = VD->getDefinition(CGM.getContext());
1656 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1657 QualType ASTTy = VD->getType();
1659 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1660 const Expr *Init = VD->getAnyInitializer();
1661 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1662 // Generate function that re-emits the declaration's initializer into the
1663 // threadprivate copy of the variable VD
1664 CodeGenFunction CtorCGF(CGM);
1665 FunctionArgList Args;
1666 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1667 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1668 ImplicitParamDecl::Other);
1669 Args.push_back(&Dst);
1671 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1672 CGM.getContext().VoidPtrTy, Args);
1673 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1674 std::string Name = getName({"__kmpc_global_ctor_", ""});
1675 llvm::Function *Fn =
1676 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1677 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1678 Args, Loc, Loc);
1679 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1680 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1681 CGM.getContext().VoidPtrTy, Dst.getLocation());
1682 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1683 VDAddr.getAlignment());
1684 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1685 /*IsInitializer=*/true);
1686 ArgVal = CtorCGF.EmitLoadOfScalar(
1687 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1688 CGM.getContext().VoidPtrTy, Dst.getLocation());
1689 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1690 CtorCGF.FinishFunction();
1691 Ctor = Fn;
1693 if (VD->getType().isDestructedType() != QualType::DK_none) {
1694 // Generate function that emits destructor call for the threadprivate copy
1695 // of the variable VD
1696 CodeGenFunction DtorCGF(CGM);
1697 FunctionArgList Args;
1698 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1699 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1700 ImplicitParamDecl::Other);
1701 Args.push_back(&Dst);
1703 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1704 CGM.getContext().VoidTy, Args);
1705 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1706 std::string Name = getName({"__kmpc_global_dtor_", ""});
1707 llvm::Function *Fn =
1708 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1709 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1710 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1711 Loc, Loc);
1712 // Create a scope with an artificial location for the body of this function.
1713 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1714 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1715 DtorCGF.GetAddrOfLocalVar(&Dst),
1716 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1717 DtorCGF.emitDestroy(
1718 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1719 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1720 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1721 DtorCGF.FinishFunction();
1722 Dtor = Fn;
1724 // Do not emit init function if it is not required.
1725 if (!Ctor && !Dtor)
1726 return nullptr;
1728 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1729 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1730 /*isVarArg=*/false)
1731 ->getPointerTo();
1732 // Copying constructor for the threadprivate variable.
1733 // Must be NULL - reserved by runtime, but currently it requires that this
1734 // parameter is always NULL. Otherwise it fires assertion.
1735 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1736 if (Ctor == nullptr) {
1737 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1738 /*isVarArg=*/false)
1739 ->getPointerTo();
1740 Ctor = llvm::Constant::getNullValue(CtorTy);
1742 if (Dtor == nullptr) {
1743 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1744 /*isVarArg=*/false)
1745 ->getPointerTo();
1746 Dtor = llvm::Constant::getNullValue(DtorTy);
1748 if (!CGF) {
1749 auto *InitFunctionTy =
1750 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1751 std::string Name = getName({"__omp_threadprivate_init_", ""});
1752 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1753 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1754 CodeGenFunction InitCGF(CGM);
1755 FunctionArgList ArgList;
1756 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1757 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1758 Loc, Loc);
1759 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1760 InitCGF.FinishFunction();
1761 return InitFunction;
1763 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1765 return nullptr;
1768 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1769 llvm::GlobalVariable *Addr,
1770 bool PerformInit) {
1771 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1772 !CGM.getLangOpts().OpenMPIsTargetDevice)
1773 return false;
1774 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1775 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1776 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1777 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
1778 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
1779 HasRequiresUnifiedSharedMemory))
1780 return CGM.getLangOpts().OpenMPIsTargetDevice;
1781 VD = VD->getDefinition(CGM.getContext());
1782 assert(VD && "Unknown VarDecl");
1784 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1785 return CGM.getLangOpts().OpenMPIsTargetDevice;
1787 QualType ASTTy = VD->getType();
1788 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1790 // Produce the unique prefix to identify the new target regions. We use
1791 // the source location of the variable declaration which we know to not
1792 // conflict with any target region.
1793 llvm::TargetRegionEntryInfo EntryInfo =
1794 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, Loc, VD->getName());
1795 SmallString<128> Buffer, Out;
1796 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo);
1798 const Expr *Init = VD->getAnyInitializer();
1799 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1800 llvm::Constant *Ctor;
1801 llvm::Constant *ID;
1802 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1803 // Generate function that re-emits the declaration's initializer into
1804 // the threadprivate copy of the variable VD
1805 CodeGenFunction CtorCGF(CGM);
1807 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1808 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1809 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1810 FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1811 llvm::GlobalValue::WeakODRLinkage);
1812 Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1813 if (CGM.getTriple().isAMDGCN())
1814 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1815 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1816 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1817 FunctionArgList(), Loc, Loc);
1818 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1819 llvm::Constant *AddrInAS0 = Addr;
1820 if (Addr->getAddressSpace() != 0)
1821 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1822 Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
1823 CtorCGF.EmitAnyExprToMem(Init,
1824 Address(AddrInAS0, Addr->getValueType(),
1825 CGM.getContext().getDeclAlign(VD)),
1826 Init->getType().getQualifiers(),
1827 /*IsInitializer=*/true);
1828 CtorCGF.FinishFunction();
1829 Ctor = Fn;
1830 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1831 } else {
1832 Ctor = new llvm::GlobalVariable(
1833 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1834 llvm::GlobalValue::PrivateLinkage,
1835 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1836 ID = Ctor;
1839 // Register the information for the entry associated with the constructor.
1840 Out.clear();
1841 auto CtorEntryInfo = EntryInfo;
1842 CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out);
1843 OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
1844 CtorEntryInfo, Ctor, ID,
1845 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor);
1847 if (VD->getType().isDestructedType() != QualType::DK_none) {
1848 llvm::Constant *Dtor;
1849 llvm::Constant *ID;
1850 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1851 // Generate function that emits destructor call for the threadprivate
1852 // copy of the variable VD
1853 CodeGenFunction DtorCGF(CGM);
1855 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1856 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1857 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1858 FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1859 llvm::GlobalValue::WeakODRLinkage);
1860 Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1861 if (CGM.getTriple().isAMDGCN())
1862 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1863 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1864 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1865 FunctionArgList(), Loc, Loc);
1866 // Create a scope with an artificial location for the body of this
1867 // function.
1868 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1869 llvm::Constant *AddrInAS0 = Addr;
1870 if (Addr->getAddressSpace() != 0)
1871 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1872 Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
1873 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1874 CGM.getContext().getDeclAlign(VD)),
1875 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1876 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1877 DtorCGF.FinishFunction();
1878 Dtor = Fn;
1879 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1880 } else {
1881 Dtor = new llvm::GlobalVariable(
1882 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1883 llvm::GlobalValue::PrivateLinkage,
1884 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1885 ID = Dtor;
1887 // Register the information for the entry associated with the destructor.
1888 Out.clear();
1889 auto DtorEntryInfo = EntryInfo;
1890 DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out);
1891 OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
1892 DtorEntryInfo, Dtor, ID,
1893 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor);
1895 return CGM.getLangOpts().OpenMPIsTargetDevice;
1898 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1899 llvm::GlobalValue *GV) {
1900 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1901 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1903 // We only need to handle active 'indirect' declare target functions.
1904 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1905 return;
1907 // Get a mangled name to store the new device global in.
1908 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1909 CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1910 SmallString<128> Name;
1911 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1913 // We need to generate a new global to hold the address of the indirectly
1914 // called device function. Doing this allows us to keep the visibility and
1915 // linkage of the associated function unchanged while allowing the runtime to
1916 // access its value.
1917 llvm::GlobalValue *Addr = GV;
1918 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1919 Addr = new llvm::GlobalVariable(
1920 CGM.getModule(), CGM.VoidPtrTy,
1921 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1922 nullptr, llvm::GlobalValue::NotThreadLocal,
1923 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1924 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1927 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1928 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1929 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1930 llvm::GlobalValue::WeakODRLinkage);
1933 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1934 QualType VarType,
1935 StringRef Name) {
1936 std::string Suffix = getName({"artificial", ""});
1937 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1938 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1939 VarLVType, Twine(Name).concat(Suffix).str());
1940 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1941 CGM.getTarget().isTLSSupported()) {
1942 GAddr->setThreadLocal(/*Val=*/true);
1943 return Address(GAddr, GAddr->getValueType(),
1944 CGM.getContext().getTypeAlignInChars(VarType));
1946 std::string CacheSuffix = getName({"cache", ""});
1947 llvm::Value *Args[] = {
1948 emitUpdateLocation(CGF, SourceLocation()),
1949 getThreadID(CGF, SourceLocation()),
1950 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1951 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1952 /*isSigned=*/false),
1953 OMPBuilder.getOrCreateInternalVariable(
1954 CGM.VoidPtrPtrTy,
1955 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1956 return Address(
1957 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1958 CGF.EmitRuntimeCall(
1959 OMPBuilder.getOrCreateRuntimeFunction(
1960 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1961 Args),
1962 VarLVType->getPointerTo(/*AddrSpace=*/0)),
1963 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1966 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1967 const RegionCodeGenTy &ThenGen,
1968 const RegionCodeGenTy &ElseGen) {
1969 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1971 // If the condition constant folds and can be elided, try to avoid emitting
1972 // the condition and the dead arm of the if/else.
1973 bool CondConstant;
1974 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1975 if (CondConstant)
1976 ThenGen(CGF);
1977 else
1978 ElseGen(CGF);
1979 return;
1982 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1983 // emit the conditional branch.
1984 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1985 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1986 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1987 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1989 // Emit the 'then' code.
1990 CGF.EmitBlock(ThenBlock);
1991 ThenGen(CGF);
1992 CGF.EmitBranch(ContBlock);
1993 // Emit the 'else' code if present.
1994 // There is no need to emit line number for unconditional branch.
1995 (void)ApplyDebugLocation::CreateEmpty(CGF);
1996 CGF.EmitBlock(ElseBlock);
1997 ElseGen(CGF);
1998 // There is no need to emit line number for unconditional branch.
1999 (void)ApplyDebugLocation::CreateEmpty(CGF);
2000 CGF.EmitBranch(ContBlock);
2001 // Emit the continuation block for code after the if.
2002 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2005 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2006 llvm::Function *OutlinedFn,
2007 ArrayRef<llvm::Value *> CapturedVars,
2008 const Expr *IfCond,
2009 llvm::Value *NumThreads) {
2010 if (!CGF.HaveInsertPoint())
2011 return;
2012 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2013 auto &M = CGM.getModule();
2014 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2015 this](CodeGenFunction &CGF, PrePostActionTy &) {
2016 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2017 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2018 llvm::Value *Args[] = {
2019 RTLoc,
2020 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2021 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2022 llvm::SmallVector<llvm::Value *, 16> RealArgs;
2023 RealArgs.append(std::begin(Args), std::end(Args));
2024 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2026 llvm::FunctionCallee RTLFn =
2027 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2028 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2030 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2031 this](CodeGenFunction &CGF, PrePostActionTy &) {
2032 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2033 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2034 // Build calls:
2035 // __kmpc_serialized_parallel(&Loc, GTid);
2036 llvm::Value *Args[] = {RTLoc, ThreadID};
2037 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2038 M, OMPRTL___kmpc_serialized_parallel),
2039 Args);
2041 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2042 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2043 Address ZeroAddrBound =
2044 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2045 /*Name=*/".bound.zero.addr");
2046 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2047 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2048 // ThreadId for serialized parallels is 0.
2049 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2050 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2051 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2053 // Ensure we do not inline the function. This is trivially true for the ones
2054 // passed to __kmpc_fork_call but the ones called in serialized regions
2055 // could be inlined. This is not a perfect but it is closer to the invariant
2056 // we want, namely, every data environment starts with a new function.
2057 // TODO: We should pass the if condition to the runtime function and do the
2058 // handling there. Much cleaner code.
2059 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2060 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2061 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2063 // __kmpc_end_serialized_parallel(&Loc, GTid);
2064 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2065 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2066 M, OMPRTL___kmpc_end_serialized_parallel),
2067 EndArgs);
2069 if (IfCond) {
2070 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2071 } else {
2072 RegionCodeGenTy ThenRCG(ThenGen);
2073 ThenRCG(CGF);
2077 // If we're inside an (outlined) parallel region, use the region info's
2078 // thread-ID variable (it is passed in a first argument of the outlined function
2079 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2080 // regular serial code region, get thread ID by calling kmp_int32
2081 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2082 // return the address of that temp.
2083 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2084 SourceLocation Loc) {
2085 if (auto *OMPRegionInfo =
2086 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2087 if (OMPRegionInfo->getThreadIDVariable())
2088 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2090 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2091 QualType Int32Ty =
2092 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2093 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2094 CGF.EmitStoreOfScalar(ThreadID,
2095 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2097 return ThreadIDTemp;
2100 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2101 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2102 std::string Name = getName({Prefix, "var"});
2103 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2106 namespace {
2107 /// Common pre(post)-action for different OpenMP constructs.
2108 class CommonActionTy final : public PrePostActionTy {
2109 llvm::FunctionCallee EnterCallee;
2110 ArrayRef<llvm::Value *> EnterArgs;
2111 llvm::FunctionCallee ExitCallee;
2112 ArrayRef<llvm::Value *> ExitArgs;
2113 bool Conditional;
2114 llvm::BasicBlock *ContBlock = nullptr;
2116 public:
2117 CommonActionTy(llvm::FunctionCallee EnterCallee,
2118 ArrayRef<llvm::Value *> EnterArgs,
2119 llvm::FunctionCallee ExitCallee,
2120 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2121 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2122 ExitArgs(ExitArgs), Conditional(Conditional) {}
2123 void Enter(CodeGenFunction &CGF) override {
2124 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2125 if (Conditional) {
2126 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2127 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2128 ContBlock = CGF.createBasicBlock("omp_if.end");
2129 // Generate the branch (If-stmt)
2130 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2131 CGF.EmitBlock(ThenBlock);
2134 void Done(CodeGenFunction &CGF) {
2135 // Emit the rest of blocks/branches
2136 CGF.EmitBranch(ContBlock);
2137 CGF.EmitBlock(ContBlock, true);
2139 void Exit(CodeGenFunction &CGF) override {
2140 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2143 } // anonymous namespace
2145 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2146 StringRef CriticalName,
2147 const RegionCodeGenTy &CriticalOpGen,
2148 SourceLocation Loc, const Expr *Hint) {
2149 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2150 // CriticalOpGen();
2151 // __kmpc_end_critical(ident_t *, gtid, Lock);
2152 // Prepare arguments and build a call to __kmpc_critical
2153 if (!CGF.HaveInsertPoint())
2154 return;
2155 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2156 getCriticalRegionLock(CriticalName)};
2157 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2158 std::end(Args));
2159 if (Hint) {
2160 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2161 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2163 CommonActionTy Action(
2164 OMPBuilder.getOrCreateRuntimeFunction(
2165 CGM.getModule(),
2166 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2167 EnterArgs,
2168 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2169 OMPRTL___kmpc_end_critical),
2170 Args);
2171 CriticalOpGen.setAction(Action);
2172 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2175 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2176 const RegionCodeGenTy &MasterOpGen,
2177 SourceLocation Loc) {
2178 if (!CGF.HaveInsertPoint())
2179 return;
2180 // if(__kmpc_master(ident_t *, gtid)) {
2181 // MasterOpGen();
2182 // __kmpc_end_master(ident_t *, gtid);
2183 // }
2184 // Prepare arguments and build a call to __kmpc_master
2185 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2186 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2187 CGM.getModule(), OMPRTL___kmpc_master),
2188 Args,
2189 OMPBuilder.getOrCreateRuntimeFunction(
2190 CGM.getModule(), OMPRTL___kmpc_end_master),
2191 Args,
2192 /*Conditional=*/true);
2193 MasterOpGen.setAction(Action);
2194 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2195 Action.Done(CGF);
2198 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2199 const RegionCodeGenTy &MaskedOpGen,
2200 SourceLocation Loc, const Expr *Filter) {
2201 if (!CGF.HaveInsertPoint())
2202 return;
2203 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2204 // MaskedOpGen();
2205 // __kmpc_end_masked(iden_t *, gtid);
2206 // }
2207 // Prepare arguments and build a call to __kmpc_masked
2208 llvm::Value *FilterVal = Filter
2209 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2210 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2211 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2212 FilterVal};
2213 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2214 getThreadID(CGF, Loc)};
2215 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2216 CGM.getModule(), OMPRTL___kmpc_masked),
2217 Args,
2218 OMPBuilder.getOrCreateRuntimeFunction(
2219 CGM.getModule(), OMPRTL___kmpc_end_masked),
2220 ArgsEnd,
2221 /*Conditional=*/true);
2222 MaskedOpGen.setAction(Action);
2223 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2224 Action.Done(CGF);
2227 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2228 SourceLocation Loc) {
2229 if (!CGF.HaveInsertPoint())
2230 return;
2231 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2232 OMPBuilder.createTaskyield(CGF.Builder);
2233 } else {
2234 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2235 llvm::Value *Args[] = {
2236 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2237 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2238 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2239 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2240 Args);
2243 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2244 Region->emitUntiedSwitch(CGF);
2247 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2248 const RegionCodeGenTy &TaskgroupOpGen,
2249 SourceLocation Loc) {
2250 if (!CGF.HaveInsertPoint())
2251 return;
2252 // __kmpc_taskgroup(ident_t *, gtid);
2253 // TaskgroupOpGen();
2254 // __kmpc_end_taskgroup(ident_t *, gtid);
2255 // Prepare arguments and build a call to __kmpc_taskgroup
2256 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2257 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2258 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2259 Args,
2260 OMPBuilder.getOrCreateRuntimeFunction(
2261 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2262 Args);
2263 TaskgroupOpGen.setAction(Action);
2264 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2267 /// Given an array of pointers to variables, project the address of a
2268 /// given variable.
2269 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2270 unsigned Index, const VarDecl *Var) {
2271 // Pull out the pointer to the variable.
2272 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2273 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2275 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2276 return Address(
2277 CGF.Builder.CreateBitCast(
2278 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2279 ElemTy, CGF.getContext().getDeclAlign(Var));
2282 static llvm::Value *emitCopyprivateCopyFunction(
2283 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2284 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2285 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2286 SourceLocation Loc) {
2287 ASTContext &C = CGM.getContext();
2288 // void copy_func(void *LHSArg, void *RHSArg);
2289 FunctionArgList Args;
2290 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2291 ImplicitParamDecl::Other);
2292 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2293 ImplicitParamDecl::Other);
2294 Args.push_back(&LHSArg);
2295 Args.push_back(&RHSArg);
2296 const auto &CGFI =
2297 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2298 std::string Name =
2299 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2300 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2301 llvm::GlobalValue::InternalLinkage, Name,
2302 &CGM.getModule());
2303 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2304 Fn->setDoesNotRecurse();
2305 CodeGenFunction CGF(CGM);
2306 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2307 // Dest = (void*[n])(LHSArg);
2308 // Src = (void*[n])(RHSArg);
2309 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2310 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2311 ArgsElemType->getPointerTo()),
2312 ArgsElemType, CGF.getPointerAlign());
2313 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2314 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2315 ArgsElemType->getPointerTo()),
2316 ArgsElemType, CGF.getPointerAlign());
2317 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2318 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2319 // ...
2320 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2321 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2322 const auto *DestVar =
2323 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2324 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2326 const auto *SrcVar =
2327 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2328 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2330 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2331 QualType Type = VD->getType();
2332 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2334 CGF.FinishFunction();
2335 return Fn;
2338 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2339 const RegionCodeGenTy &SingleOpGen,
2340 SourceLocation Loc,
2341 ArrayRef<const Expr *> CopyprivateVars,
2342 ArrayRef<const Expr *> SrcExprs,
2343 ArrayRef<const Expr *> DstExprs,
2344 ArrayRef<const Expr *> AssignmentOps) {
2345 if (!CGF.HaveInsertPoint())
2346 return;
2347 assert(CopyprivateVars.size() == SrcExprs.size() &&
2348 CopyprivateVars.size() == DstExprs.size() &&
2349 CopyprivateVars.size() == AssignmentOps.size());
2350 ASTContext &C = CGM.getContext();
2351 // int32 did_it = 0;
2352 // if(__kmpc_single(ident_t *, gtid)) {
2353 // SingleOpGen();
2354 // __kmpc_end_single(ident_t *, gtid);
2355 // did_it = 1;
2356 // }
2357 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2358 // <copy_func>, did_it);
2360 Address DidIt = Address::invalid();
2361 if (!CopyprivateVars.empty()) {
2362 // int32 did_it = 0;
2363 QualType KmpInt32Ty =
2364 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2365 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2366 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2368 // Prepare arguments and build a call to __kmpc_single
2369 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2370 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2371 CGM.getModule(), OMPRTL___kmpc_single),
2372 Args,
2373 OMPBuilder.getOrCreateRuntimeFunction(
2374 CGM.getModule(), OMPRTL___kmpc_end_single),
2375 Args,
2376 /*Conditional=*/true);
2377 SingleOpGen.setAction(Action);
2378 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2379 if (DidIt.isValid()) {
2380 // did_it = 1;
2381 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2383 Action.Done(CGF);
2384 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2385 // <copy_func>, did_it);
2386 if (DidIt.isValid()) {
2387 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2388 QualType CopyprivateArrayTy = C.getConstantArrayType(
2389 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2390 /*IndexTypeQuals=*/0);
2391 // Create a list of all private variables for copyprivate.
2392 Address CopyprivateList =
2393 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2394 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2395 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2396 CGF.Builder.CreateStore(
2397 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2398 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2399 CGF.VoidPtrTy),
2400 Elem);
2402 // Build function that copies private values from single region to all other
2403 // threads in the corresponding parallel region.
2404 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2405 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2406 SrcExprs, DstExprs, AssignmentOps, Loc);
2407 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2408 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2409 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2410 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2411 llvm::Value *Args[] = {
2412 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2413 getThreadID(CGF, Loc), // i32 <gtid>
2414 BufSize, // size_t <buf_size>
2415 CL.getPointer(), // void *<copyprivate list>
2416 CpyFn, // void (*) (void *, void *) <copy_func>
2417 DidItVal // i32 did_it
2419 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2420 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2421 Args);
2425 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2426 const RegionCodeGenTy &OrderedOpGen,
2427 SourceLocation Loc, bool IsThreads) {
2428 if (!CGF.HaveInsertPoint())
2429 return;
2430 // __kmpc_ordered(ident_t *, gtid);
2431 // OrderedOpGen();
2432 // __kmpc_end_ordered(ident_t *, gtid);
2433 // Prepare arguments and build a call to __kmpc_ordered
2434 if (IsThreads) {
2435 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2436 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2437 CGM.getModule(), OMPRTL___kmpc_ordered),
2438 Args,
2439 OMPBuilder.getOrCreateRuntimeFunction(
2440 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2441 Args);
2442 OrderedOpGen.setAction(Action);
2443 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2444 return;
2446 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2449 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2450 unsigned Flags;
2451 if (Kind == OMPD_for)
2452 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2453 else if (Kind == OMPD_sections)
2454 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2455 else if (Kind == OMPD_single)
2456 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2457 else if (Kind == OMPD_barrier)
2458 Flags = OMP_IDENT_BARRIER_EXPL;
2459 else
2460 Flags = OMP_IDENT_BARRIER_IMPL;
2461 return Flags;
2464 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2465 CodeGenFunction &CGF, const OMPLoopDirective &S,
2466 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2467 // Check if the loop directive is actually a doacross loop directive. In this
2468 // case choose static, 1 schedule.
2469 if (llvm::any_of(
2470 S.getClausesOfKind<OMPOrderedClause>(),
2471 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2472 ScheduleKind = OMPC_SCHEDULE_static;
2473 // Chunk size is 1 in this case.
2474 llvm::APInt ChunkSize(32, 1);
2475 ChunkExpr = IntegerLiteral::Create(
2476 CGF.getContext(), ChunkSize,
2477 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2478 SourceLocation());
2482 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2483 OpenMPDirectiveKind Kind, bool EmitChecks,
2484 bool ForceSimpleCall) {
2485 // Check if we should use the OMPBuilder
2486 auto *OMPRegionInfo =
2487 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2488 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2489 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2490 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2491 return;
2494 if (!CGF.HaveInsertPoint())
2495 return;
2496 // Build call __kmpc_cancel_barrier(loc, thread_id);
2497 // Build call __kmpc_barrier(loc, thread_id);
2498 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2499 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2500 // thread_id);
2501 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2502 getThreadID(CGF, Loc)};
2503 if (OMPRegionInfo) {
2504 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2505 llvm::Value *Result = CGF.EmitRuntimeCall(
2506 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2507 OMPRTL___kmpc_cancel_barrier),
2508 Args);
2509 if (EmitChecks) {
2510 // if (__kmpc_cancel_barrier()) {
2511 // exit from construct;
2512 // }
2513 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2514 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2515 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2516 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2517 CGF.EmitBlock(ExitBB);
2518 // exit from construct;
2519 CodeGenFunction::JumpDest CancelDestination =
2520 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2521 CGF.EmitBranchThroughCleanup(CancelDestination);
2522 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2524 return;
2527 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2528 CGM.getModule(), OMPRTL___kmpc_barrier),
2529 Args);
2532 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2533 Expr *ME, bool IsFatal) {
2534 llvm::Value *MVL =
2535 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2536 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2537 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2538 // *message)
2539 llvm::Value *Args[] = {
2540 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2541 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2542 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2543 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2544 CGM.getModule(), OMPRTL___kmpc_error),
2545 Args);
2548 /// Map the OpenMP loop schedule to the runtime enumeration.
2549 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2550 bool Chunked, bool Ordered) {
2551 switch (ScheduleKind) {
2552 case OMPC_SCHEDULE_static:
2553 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2554 : (Ordered ? OMP_ord_static : OMP_sch_static);
2555 case OMPC_SCHEDULE_dynamic:
2556 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2557 case OMPC_SCHEDULE_guided:
2558 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2559 case OMPC_SCHEDULE_runtime:
2560 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2561 case OMPC_SCHEDULE_auto:
2562 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2563 case OMPC_SCHEDULE_unknown:
2564 assert(!Chunked && "chunk was specified but schedule kind not known");
2565 return Ordered ? OMP_ord_static : OMP_sch_static;
2567 llvm_unreachable("Unexpected runtime schedule");
2570 /// Map the OpenMP distribute schedule to the runtime enumeration.
2571 static OpenMPSchedType
2572 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2573 // only static is allowed for dist_schedule
2574 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2577 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2578 bool Chunked) const {
2579 OpenMPSchedType Schedule =
2580 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2581 return Schedule == OMP_sch_static;
2584 bool CGOpenMPRuntime::isStaticNonchunked(
2585 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2586 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2587 return Schedule == OMP_dist_sch_static;
2590 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2591 bool Chunked) const {
2592 OpenMPSchedType Schedule =
2593 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2594 return Schedule == OMP_sch_static_chunked;
2597 bool CGOpenMPRuntime::isStaticChunked(
2598 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2599 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2600 return Schedule == OMP_dist_sch_static_chunked;
2603 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2604 OpenMPSchedType Schedule =
2605 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2606 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2607 return Schedule != OMP_sch_static;
2610 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2611 OpenMPScheduleClauseModifier M1,
2612 OpenMPScheduleClauseModifier M2) {
2613 int Modifier = 0;
2614 switch (M1) {
2615 case OMPC_SCHEDULE_MODIFIER_monotonic:
2616 Modifier = OMP_sch_modifier_monotonic;
2617 break;
2618 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2619 Modifier = OMP_sch_modifier_nonmonotonic;
2620 break;
2621 case OMPC_SCHEDULE_MODIFIER_simd:
2622 if (Schedule == OMP_sch_static_chunked)
2623 Schedule = OMP_sch_static_balanced_chunked;
2624 break;
2625 case OMPC_SCHEDULE_MODIFIER_last:
2626 case OMPC_SCHEDULE_MODIFIER_unknown:
2627 break;
2629 switch (M2) {
2630 case OMPC_SCHEDULE_MODIFIER_monotonic:
2631 Modifier = OMP_sch_modifier_monotonic;
2632 break;
2633 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2634 Modifier = OMP_sch_modifier_nonmonotonic;
2635 break;
2636 case OMPC_SCHEDULE_MODIFIER_simd:
2637 if (Schedule == OMP_sch_static_chunked)
2638 Schedule = OMP_sch_static_balanced_chunked;
2639 break;
2640 case OMPC_SCHEDULE_MODIFIER_last:
2641 case OMPC_SCHEDULE_MODIFIER_unknown:
2642 break;
2644 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2645 // If the static schedule kind is specified or if the ordered clause is
2646 // specified, and if the nonmonotonic modifier is not specified, the effect is
2647 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2648 // modifier is specified, the effect is as if the nonmonotonic modifier is
2649 // specified.
2650 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2651 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2652 Schedule == OMP_sch_static_balanced_chunked ||
2653 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2654 Schedule == OMP_dist_sch_static_chunked ||
2655 Schedule == OMP_dist_sch_static))
2656 Modifier = OMP_sch_modifier_nonmonotonic;
2658 return Schedule | Modifier;
2661 void CGOpenMPRuntime::emitForDispatchInit(
2662 CodeGenFunction &CGF, SourceLocation Loc,
2663 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2664 bool Ordered, const DispatchRTInput &DispatchValues) {
2665 if (!CGF.HaveInsertPoint())
2666 return;
2667 OpenMPSchedType Schedule = getRuntimeSchedule(
2668 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2669 assert(Ordered ||
2670 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2671 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2672 Schedule != OMP_sch_static_balanced_chunked));
2673 // Call __kmpc_dispatch_init(
2674 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2675 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2676 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2678 // If the Chunk was not specified in the clause - use default value 1.
2679 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2680 : CGF.Builder.getIntN(IVSize, 1);
2681 llvm::Value *Args[] = {
2682 emitUpdateLocation(CGF, Loc),
2683 getThreadID(CGF, Loc),
2684 CGF.Builder.getInt32(addMonoNonMonoModifier(
2685 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2686 DispatchValues.LB, // Lower
2687 DispatchValues.UB, // Upper
2688 CGF.Builder.getIntN(IVSize, 1), // Stride
2689 Chunk // Chunk
2691 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2692 Args);
2695 static void emitForStaticInitCall(
2696 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2697 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2698 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2699 const CGOpenMPRuntime::StaticRTInput &Values) {
2700 if (!CGF.HaveInsertPoint())
2701 return;
2703 assert(!Values.Ordered);
2704 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2705 Schedule == OMP_sch_static_balanced_chunked ||
2706 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2707 Schedule == OMP_dist_sch_static ||
2708 Schedule == OMP_dist_sch_static_chunked);
2710 // Call __kmpc_for_static_init(
2711 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2712 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2713 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2714 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2715 llvm::Value *Chunk = Values.Chunk;
2716 if (Chunk == nullptr) {
2717 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2718 Schedule == OMP_dist_sch_static) &&
2719 "expected static non-chunked schedule");
2720 // If the Chunk was not specified in the clause - use default value 1.
2721 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2722 } else {
2723 assert((Schedule == OMP_sch_static_chunked ||
2724 Schedule == OMP_sch_static_balanced_chunked ||
2725 Schedule == OMP_ord_static_chunked ||
2726 Schedule == OMP_dist_sch_static_chunked) &&
2727 "expected static chunked schedule");
2729 llvm::Value *Args[] = {
2730 UpdateLocation,
2731 ThreadId,
2732 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2733 M2)), // Schedule type
2734 Values.IL.getPointer(), // &isLastIter
2735 Values.LB.getPointer(), // &LB
2736 Values.UB.getPointer(), // &UB
2737 Values.ST.getPointer(), // &Stride
2738 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2739 Chunk // Chunk
2741 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2744 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2745 SourceLocation Loc,
2746 OpenMPDirectiveKind DKind,
2747 const OpenMPScheduleTy &ScheduleKind,
2748 const StaticRTInput &Values) {
2749 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2750 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2751 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2752 "Expected loop-based or sections-based directive.");
2753 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2754 isOpenMPLoopDirective(DKind)
2755 ? OMP_IDENT_WORK_LOOP
2756 : OMP_IDENT_WORK_SECTIONS);
2757 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2758 llvm::FunctionCallee StaticInitFunction =
2759 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2760 false);
2761 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2762 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2763 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2766 void CGOpenMPRuntime::emitDistributeStaticInit(
2767 CodeGenFunction &CGF, SourceLocation Loc,
2768 OpenMPDistScheduleClauseKind SchedKind,
2769 const CGOpenMPRuntime::StaticRTInput &Values) {
2770 OpenMPSchedType ScheduleNum =
2771 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2772 llvm::Value *UpdatedLocation =
2773 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2774 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2775 llvm::FunctionCallee StaticInitFunction;
2776 bool isGPUDistribute =
2777 CGM.getLangOpts().OpenMPIsTargetDevice &&
2778 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2779 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2780 Values.IVSize, Values.IVSigned, isGPUDistribute);
2782 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2783 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2784 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2787 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2788 SourceLocation Loc,
2789 OpenMPDirectiveKind DKind) {
2790 if (!CGF.HaveInsertPoint())
2791 return;
2792 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2793 llvm::Value *Args[] = {
2794 emitUpdateLocation(CGF, Loc,
2795 isOpenMPDistributeDirective(DKind)
2796 ? OMP_IDENT_WORK_DISTRIBUTE
2797 : isOpenMPLoopDirective(DKind)
2798 ? OMP_IDENT_WORK_LOOP
2799 : OMP_IDENT_WORK_SECTIONS),
2800 getThreadID(CGF, Loc)};
2801 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2802 if (isOpenMPDistributeDirective(DKind) &&
2803 CGM.getLangOpts().OpenMPIsTargetDevice &&
2804 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2805 CGF.EmitRuntimeCall(
2806 OMPBuilder.getOrCreateRuntimeFunction(
2807 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2808 Args);
2809 else
2810 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2811 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2812 Args);
2815 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2816 SourceLocation Loc,
2817 unsigned IVSize,
2818 bool IVSigned) {
2819 if (!CGF.HaveInsertPoint())
2820 return;
2821 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2822 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2823 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2824 Args);
2827 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2828 SourceLocation Loc, unsigned IVSize,
2829 bool IVSigned, Address IL,
2830 Address LB, Address UB,
2831 Address ST) {
2832 // Call __kmpc_dispatch_next(
2833 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2834 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2835 // kmp_int[32|64] *p_stride);
2836 llvm::Value *Args[] = {
2837 emitUpdateLocation(CGF, Loc),
2838 getThreadID(CGF, Loc),
2839 IL.getPointer(), // &isLastIter
2840 LB.getPointer(), // &Lower
2841 UB.getPointer(), // &Upper
2842 ST.getPointer() // &Stride
2844 llvm::Value *Call = CGF.EmitRuntimeCall(
2845 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2846 return CGF.EmitScalarConversion(
2847 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2848 CGF.getContext().BoolTy, Loc);
2851 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2852 llvm::Value *NumThreads,
2853 SourceLocation Loc) {
2854 if (!CGF.HaveInsertPoint())
2855 return;
2856 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2857 llvm::Value *Args[] = {
2858 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2859 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2860 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2861 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2862 Args);
2865 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2866 ProcBindKind ProcBind,
2867 SourceLocation Loc) {
2868 if (!CGF.HaveInsertPoint())
2869 return;
2870 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2871 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2872 llvm::Value *Args[] = {
2873 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2874 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2875 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2876 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2877 Args);
2880 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2881 SourceLocation Loc, llvm::AtomicOrdering AO) {
2882 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2883 OMPBuilder.createFlush(CGF.Builder);
2884 } else {
2885 if (!CGF.HaveInsertPoint())
2886 return;
2887 // Build call void __kmpc_flush(ident_t *loc)
2888 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2889 CGM.getModule(), OMPRTL___kmpc_flush),
2890 emitUpdateLocation(CGF, Loc));
2894 namespace {
2895 /// Indexes of fields for type kmp_task_t.
2896 enum KmpTaskTFields {
2897 /// List of shared variables.
2898 KmpTaskTShareds,
2899 /// Task routine.
2900 KmpTaskTRoutine,
2901 /// Partition id for the untied tasks.
2902 KmpTaskTPartId,
2903 /// Function with call of destructors for private variables.
2904 Data1,
2905 /// Task priority.
2906 Data2,
2907 /// (Taskloops only) Lower bound.
2908 KmpTaskTLowerBound,
2909 /// (Taskloops only) Upper bound.
2910 KmpTaskTUpperBound,
2911 /// (Taskloops only) Stride.
2912 KmpTaskTStride,
2913 /// (Taskloops only) Is last iteration flag.
2914 KmpTaskTLastIter,
2915 /// (Taskloops only) Reduction data.
2916 KmpTaskTReductions,
2918 } // anonymous namespace
2920 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2921 // If we are in simd mode or there are no entries, we don't need to do
2922 // anything.
2923 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2924 return;
2926 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2927 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2928 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2929 SourceLocation Loc;
2930 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2931 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2932 E = CGM.getContext().getSourceManager().fileinfo_end();
2933 I != E; ++I) {
2934 if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID &&
2935 I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) {
2936 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2937 I->getFirst(), EntryInfo.Line, 1);
2938 break;
2942 switch (Kind) {
2943 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2944 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2945 DiagnosticsEngine::Error, "Offloading entry for target region in "
2946 "%0 is incorrect: either the "
2947 "address or the ID is invalid.");
2948 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2949 } break;
2950 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2951 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2952 DiagnosticsEngine::Error, "Offloading entry for declare target "
2953 "variable %0 is incorrect: the "
2954 "address is invalid.");
2955 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2956 } break;
2957 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2958 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2959 DiagnosticsEngine::Error,
2960 "Offloading entry for declare target variable is incorrect: the "
2961 "address is invalid.");
2962 CGM.getDiags().Report(DiagID);
2963 } break;
2967 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2970 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2971 if (!KmpRoutineEntryPtrTy) {
2972 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2973 ASTContext &C = CGM.getContext();
2974 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2975 FunctionProtoType::ExtProtoInfo EPI;
2976 KmpRoutineEntryPtrQTy = C.getPointerType(
2977 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2978 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2982 namespace {
2983 struct PrivateHelpersTy {
2984 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2985 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2986 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2987 PrivateElemInit(PrivateElemInit) {}
2988 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2989 const Expr *OriginalRef = nullptr;
2990 const VarDecl *Original = nullptr;
2991 const VarDecl *PrivateCopy = nullptr;
2992 const VarDecl *PrivateElemInit = nullptr;
2993 bool isLocalPrivate() const {
2994 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2997 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2998 } // anonymous namespace
3000 static bool isAllocatableDecl(const VarDecl *VD) {
3001 const VarDecl *CVD = VD->getCanonicalDecl();
3002 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3003 return false;
3004 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3005 // Use the default allocation.
3006 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3007 !AA->getAllocator());
3010 static RecordDecl *
3011 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3012 if (!Privates.empty()) {
3013 ASTContext &C = CGM.getContext();
3014 // Build struct .kmp_privates_t. {
3015 // /* private vars */
3016 // };
3017 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3018 RD->startDefinition();
3019 for (const auto &Pair : Privates) {
3020 const VarDecl *VD = Pair.second.Original;
3021 QualType Type = VD->getType().getNonReferenceType();
3022 // If the private variable is a local variable with lvalue ref type,
3023 // allocate the pointer instead of the pointee type.
3024 if (Pair.second.isLocalPrivate()) {
3025 if (VD->getType()->isLValueReferenceType())
3026 Type = C.getPointerType(Type);
3027 if (isAllocatableDecl(VD))
3028 Type = C.getPointerType(Type);
3030 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3031 if (VD->hasAttrs()) {
3032 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3033 E(VD->getAttrs().end());
3034 I != E; ++I)
3035 FD->addAttr(*I);
3038 RD->completeDefinition();
3039 return RD;
3041 return nullptr;
3044 static RecordDecl *
3045 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3046 QualType KmpInt32Ty,
3047 QualType KmpRoutineEntryPointerQTy) {
3048 ASTContext &C = CGM.getContext();
3049 // Build struct kmp_task_t {
3050 // void * shareds;
3051 // kmp_routine_entry_t routine;
3052 // kmp_int32 part_id;
3053 // kmp_cmplrdata_t data1;
3054 // kmp_cmplrdata_t data2;
3055 // For taskloops additional fields:
3056 // kmp_uint64 lb;
3057 // kmp_uint64 ub;
3058 // kmp_int64 st;
3059 // kmp_int32 liter;
3060 // void * reductions;
3061 // };
3062 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3063 UD->startDefinition();
3064 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3065 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3066 UD->completeDefinition();
3067 QualType KmpCmplrdataTy = C.getRecordType(UD);
3068 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3069 RD->startDefinition();
3070 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3071 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3072 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3073 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3074 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3075 if (isOpenMPTaskLoopDirective(Kind)) {
3076 QualType KmpUInt64Ty =
3077 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3078 QualType KmpInt64Ty =
3079 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3080 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3081 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3082 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3083 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3084 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3086 RD->completeDefinition();
3087 return RD;
3090 static RecordDecl *
3091 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3092 ArrayRef<PrivateDataTy> Privates) {
3093 ASTContext &C = CGM.getContext();
3094 // Build struct kmp_task_t_with_privates {
3095 // kmp_task_t task_data;
3096 // .kmp_privates_t. privates;
3097 // };
3098 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3099 RD->startDefinition();
3100 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3101 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3102 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3103 RD->completeDefinition();
3104 return RD;
3107 /// Emit a proxy function which accepts kmp_task_t as the second
3108 /// argument.
3109 /// \code
3110 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3111 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3112 /// For taskloops:
3113 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3114 /// tt->reductions, tt->shareds);
3115 /// return 0;
3116 /// }
3117 /// \endcode
3118 static llvm::Function *
3119 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3120 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3121 QualType KmpTaskTWithPrivatesPtrQTy,
3122 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3123 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3124 llvm::Value *TaskPrivatesMap) {
3125 ASTContext &C = CGM.getContext();
3126 FunctionArgList Args;
3127 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3128 ImplicitParamDecl::Other);
3129 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3130 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3131 ImplicitParamDecl::Other);
3132 Args.push_back(&GtidArg);
3133 Args.push_back(&TaskTypeArg);
3134 const auto &TaskEntryFnInfo =
3135 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3136 llvm::FunctionType *TaskEntryTy =
3137 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3138 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3139 auto *TaskEntry = llvm::Function::Create(
3140 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3141 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3142 TaskEntry->setDoesNotRecurse();
3143 CodeGenFunction CGF(CGM);
3144 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3145 Loc, Loc);
3147 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3148 // tt,
3149 // For taskloops:
3150 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3151 // tt->task_data.shareds);
3152 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3153 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3154 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3155 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3156 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3157 const auto *KmpTaskTWithPrivatesQTyRD =
3158 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3159 LValue Base =
3160 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3161 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3162 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3163 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3164 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3166 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3167 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3168 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3169 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3170 CGF.ConvertTypeForMem(SharedsPtrTy));
3172 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3173 llvm::Value *PrivatesParam;
3174 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3175 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3176 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3177 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3178 } else {
3179 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3182 llvm::Value *CommonArgs[] = {
3183 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3184 CGF.Builder
3185 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3186 CGF.VoidPtrTy, CGF.Int8Ty)
3187 .getPointer()};
3188 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3189 std::end(CommonArgs));
3190 if (isOpenMPTaskLoopDirective(Kind)) {
3191 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3192 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3193 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3194 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3195 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3196 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3197 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3198 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3199 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3200 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3201 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3202 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3203 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3204 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3205 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3206 CallArgs.push_back(LBParam);
3207 CallArgs.push_back(UBParam);
3208 CallArgs.push_back(StParam);
3209 CallArgs.push_back(LIParam);
3210 CallArgs.push_back(RParam);
3212 CallArgs.push_back(SharedsParam);
3214 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3215 CallArgs);
3216 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3217 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3218 CGF.FinishFunction();
3219 return TaskEntry;
3222 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3223 SourceLocation Loc,
3224 QualType KmpInt32Ty,
3225 QualType KmpTaskTWithPrivatesPtrQTy,
3226 QualType KmpTaskTWithPrivatesQTy) {
3227 ASTContext &C = CGM.getContext();
3228 FunctionArgList Args;
3229 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3230 ImplicitParamDecl::Other);
3231 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3232 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3233 ImplicitParamDecl::Other);
3234 Args.push_back(&GtidArg);
3235 Args.push_back(&TaskTypeArg);
3236 const auto &DestructorFnInfo =
3237 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3238 llvm::FunctionType *DestructorFnTy =
3239 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3240 std::string Name =
3241 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3242 auto *DestructorFn =
3243 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3244 Name, &CGM.getModule());
3245 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3246 DestructorFnInfo);
3247 DestructorFn->setDoesNotRecurse();
3248 CodeGenFunction CGF(CGM);
3249 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3250 Args, Loc, Loc);
3252 LValue Base = CGF.EmitLoadOfPointerLValue(
3253 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3254 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3255 const auto *KmpTaskTWithPrivatesQTyRD =
3256 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3257 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3258 Base = CGF.EmitLValueForField(Base, *FI);
3259 for (const auto *Field :
3260 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3261 if (QualType::DestructionKind DtorKind =
3262 Field->getType().isDestructedType()) {
3263 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3264 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3267 CGF.FinishFunction();
3268 return DestructorFn;
3271 /// Emit a privates mapping function for correct handling of private and
3272 /// firstprivate variables.
3273 /// \code
3274 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3275 /// **noalias priv1,..., <tyn> **noalias privn) {
3276 /// *priv1 = &.privates.priv1;
3277 /// ...;
3278 /// *privn = &.privates.privn;
3279 /// }
3280 /// \endcode
3281 static llvm::Value *
3282 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3283 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3284 ArrayRef<PrivateDataTy> Privates) {
3285 ASTContext &C = CGM.getContext();
3286 FunctionArgList Args;
3287 ImplicitParamDecl TaskPrivatesArg(
3288 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3289 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3290 ImplicitParamDecl::Other);
3291 Args.push_back(&TaskPrivatesArg);
3292 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3293 unsigned Counter = 1;
3294 for (const Expr *E : Data.PrivateVars) {
3295 Args.push_back(ImplicitParamDecl::Create(
3296 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3297 C.getPointerType(C.getPointerType(E->getType()))
3298 .withConst()
3299 .withRestrict(),
3300 ImplicitParamDecl::Other));
3301 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3302 PrivateVarsPos[VD] = Counter;
3303 ++Counter;
3305 for (const Expr *E : Data.FirstprivateVars) {
3306 Args.push_back(ImplicitParamDecl::Create(
3307 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3308 C.getPointerType(C.getPointerType(E->getType()))
3309 .withConst()
3310 .withRestrict(),
3311 ImplicitParamDecl::Other));
3312 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3313 PrivateVarsPos[VD] = Counter;
3314 ++Counter;
3316 for (const Expr *E : Data.LastprivateVars) {
3317 Args.push_back(ImplicitParamDecl::Create(
3318 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3319 C.getPointerType(C.getPointerType(E->getType()))
3320 .withConst()
3321 .withRestrict(),
3322 ImplicitParamDecl::Other));
3323 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3324 PrivateVarsPos[VD] = Counter;
3325 ++Counter;
3327 for (const VarDecl *VD : Data.PrivateLocals) {
3328 QualType Ty = VD->getType().getNonReferenceType();
3329 if (VD->getType()->isLValueReferenceType())
3330 Ty = C.getPointerType(Ty);
3331 if (isAllocatableDecl(VD))
3332 Ty = C.getPointerType(Ty);
3333 Args.push_back(ImplicitParamDecl::Create(
3334 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3335 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3336 ImplicitParamDecl::Other));
3337 PrivateVarsPos[VD] = Counter;
3338 ++Counter;
3340 const auto &TaskPrivatesMapFnInfo =
3341 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3342 llvm::FunctionType *TaskPrivatesMapTy =
3343 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3344 std::string Name =
3345 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3346 auto *TaskPrivatesMap = llvm::Function::Create(
3347 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3348 &CGM.getModule());
3349 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3350 TaskPrivatesMapFnInfo);
3351 if (CGM.getLangOpts().Optimize) {
3352 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3353 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3354 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3356 CodeGenFunction CGF(CGM);
3357 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3358 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3360 // *privi = &.privates.privi;
3361 LValue Base = CGF.EmitLoadOfPointerLValue(
3362 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3363 TaskPrivatesArg.getType()->castAs<PointerType>());
3364 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3365 Counter = 0;
3366 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3367 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3368 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3369 LValue RefLVal =
3370 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3371 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3372 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3373 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3374 ++Counter;
3376 CGF.FinishFunction();
3377 return TaskPrivatesMap;
3380 /// Emit initialization for private variables in task-based directives.
3381 static void emitPrivatesInit(CodeGenFunction &CGF,
3382 const OMPExecutableDirective &D,
3383 Address KmpTaskSharedsPtr, LValue TDBase,
3384 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3385 QualType SharedsTy, QualType SharedsPtrTy,
3386 const OMPTaskDataTy &Data,
3387 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3388 ASTContext &C = CGF.getContext();
3389 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3390 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3391 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3392 ? OMPD_taskloop
3393 : OMPD_task;
3394 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3395 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3396 LValue SrcBase;
3397 bool IsTargetTask =
3398 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3399 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3400 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3401 // PointersArray, SizesArray, and MappersArray. The original variables for
3402 // these arrays are not captured and we get their addresses explicitly.
3403 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3404 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3405 SrcBase = CGF.MakeAddrLValue(
3406 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3407 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3408 CGF.ConvertTypeForMem(SharedsTy)),
3409 SharedsTy);
3411 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3412 for (const PrivateDataTy &Pair : Privates) {
3413 // Do not initialize private locals.
3414 if (Pair.second.isLocalPrivate()) {
3415 ++FI;
3416 continue;
3418 const VarDecl *VD = Pair.second.PrivateCopy;
3419 const Expr *Init = VD->getAnyInitializer();
3420 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3421 !CGF.isTrivialInitializer(Init)))) {
3422 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3423 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3424 const VarDecl *OriginalVD = Pair.second.Original;
3425 // Check if the variable is the target-based BasePointersArray,
3426 // PointersArray, SizesArray, or MappersArray.
3427 LValue SharedRefLValue;
3428 QualType Type = PrivateLValue.getType();
3429 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3430 if (IsTargetTask && !SharedField) {
3431 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3432 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3433 cast<CapturedDecl>(OriginalVD->getDeclContext())
3434 ->getNumParams() == 0 &&
3435 isa<TranslationUnitDecl>(
3436 cast<CapturedDecl>(OriginalVD->getDeclContext())
3437 ->getDeclContext()) &&
3438 "Expected artificial target data variable.");
3439 SharedRefLValue =
3440 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3441 } else if (ForDup) {
3442 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3443 SharedRefLValue = CGF.MakeAddrLValue(
3444 SharedRefLValue.getAddress(CGF).withAlignment(
3445 C.getDeclAlign(OriginalVD)),
3446 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3447 SharedRefLValue.getTBAAInfo());
3448 } else if (CGF.LambdaCaptureFields.count(
3449 Pair.second.Original->getCanonicalDecl()) > 0 ||
3450 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3451 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3452 } else {
3453 // Processing for implicitly captured variables.
3454 InlinedOpenMPRegionRAII Region(
3455 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3456 /*HasCancel=*/false, /*NoInheritance=*/true);
3457 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3459 if (Type->isArrayType()) {
3460 // Initialize firstprivate array.
3461 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3462 // Perform simple memcpy.
3463 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3464 } else {
3465 // Initialize firstprivate array using element-by-element
3466 // initialization.
3467 CGF.EmitOMPAggregateAssign(
3468 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3469 Type,
3470 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3471 Address SrcElement) {
3472 // Clean up any temporaries needed by the initialization.
3473 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3474 InitScope.addPrivate(Elem, SrcElement);
3475 (void)InitScope.Privatize();
3476 // Emit initialization for single element.
3477 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3478 CGF, &CapturesInfo);
3479 CGF.EmitAnyExprToMem(Init, DestElement,
3480 Init->getType().getQualifiers(),
3481 /*IsInitializer=*/false);
3484 } else {
3485 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3486 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3487 (void)InitScope.Privatize();
3488 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3489 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3490 /*capturedByInit=*/false);
3492 } else {
3493 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3496 ++FI;
3500 /// Check if duplication function is required for taskloops.
3501 static bool checkInitIsRequired(CodeGenFunction &CGF,
3502 ArrayRef<PrivateDataTy> Privates) {
3503 bool InitRequired = false;
3504 for (const PrivateDataTy &Pair : Privates) {
3505 if (Pair.second.isLocalPrivate())
3506 continue;
3507 const VarDecl *VD = Pair.second.PrivateCopy;
3508 const Expr *Init = VD->getAnyInitializer();
3509 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3510 !CGF.isTrivialInitializer(Init));
3511 if (InitRequired)
3512 break;
3514 return InitRequired;
3518 /// Emit task_dup function (for initialization of
3519 /// private/firstprivate/lastprivate vars and last_iter flag)
3520 /// \code
3521 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3522 /// lastpriv) {
3523 /// // setup lastprivate flag
3524 /// task_dst->last = lastpriv;
3525 /// // could be constructor calls here...
3526 /// }
3527 /// \endcode
3528 static llvm::Value *
3529 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3530 const OMPExecutableDirective &D,
3531 QualType KmpTaskTWithPrivatesPtrQTy,
3532 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3533 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3534 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3535 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3536 ASTContext &C = CGM.getContext();
3537 FunctionArgList Args;
3538 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3539 KmpTaskTWithPrivatesPtrQTy,
3540 ImplicitParamDecl::Other);
3541 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3542 KmpTaskTWithPrivatesPtrQTy,
3543 ImplicitParamDecl::Other);
3544 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3545 ImplicitParamDecl::Other);
3546 Args.push_back(&DstArg);
3547 Args.push_back(&SrcArg);
3548 Args.push_back(&LastprivArg);
3549 const auto &TaskDupFnInfo =
3550 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3551 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3552 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3553 auto *TaskDup = llvm::Function::Create(
3554 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3555 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3556 TaskDup->setDoesNotRecurse();
3557 CodeGenFunction CGF(CGM);
3558 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3559 Loc);
3561 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3562 CGF.GetAddrOfLocalVar(&DstArg),
3563 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3564 // task_dst->liter = lastpriv;
3565 if (WithLastIter) {
3566 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3567 LValue Base = CGF.EmitLValueForField(
3568 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3569 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3570 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3571 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3572 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3575 // Emit initial values for private copies (if any).
3576 assert(!Privates.empty());
3577 Address KmpTaskSharedsPtr = Address::invalid();
3578 if (!Data.FirstprivateVars.empty()) {
3579 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3580 CGF.GetAddrOfLocalVar(&SrcArg),
3581 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3582 LValue Base = CGF.EmitLValueForField(
3583 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3584 KmpTaskSharedsPtr = Address(
3585 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3586 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3587 KmpTaskTShareds)),
3588 Loc),
3589 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3591 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3592 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3593 CGF.FinishFunction();
3594 return TaskDup;
3597 /// Checks if destructor function is required to be generated.
3598 /// \return true if cleanups are required, false otherwise.
3599 static bool
3600 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3601 ArrayRef<PrivateDataTy> Privates) {
3602 for (const PrivateDataTy &P : Privates) {
3603 if (P.second.isLocalPrivate())
3604 continue;
3605 QualType Ty = P.second.Original->getType().getNonReferenceType();
3606 if (Ty.isDestructedType())
3607 return true;
3609 return false;
3612 namespace {
3613 /// Loop generator for OpenMP iterator expression.
3614 class OMPIteratorGeneratorScope final
3615 : public CodeGenFunction::OMPPrivateScope {
3616 CodeGenFunction &CGF;
3617 const OMPIteratorExpr *E = nullptr;
3618 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3619 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3620 OMPIteratorGeneratorScope() = delete;
3621 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3623 public:
3624 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3625 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3626 if (!E)
3627 return;
3628 SmallVector<llvm::Value *, 4> Uppers;
3629 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3630 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3631 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3632 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3633 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3634 addPrivate(
3635 HelperData.CounterVD,
3636 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3638 Privatize();
3640 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3641 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3642 LValue CLVal =
3643 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3644 HelperData.CounterVD->getType());
3645 // Counter = 0;
3646 CGF.EmitStoreOfScalar(
3647 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3648 CLVal);
3649 CodeGenFunction::JumpDest &ContDest =
3650 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3651 CodeGenFunction::JumpDest &ExitDest =
3652 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3653 // N = <number-of_iterations>;
3654 llvm::Value *N = Uppers[I];
3655 // cont:
3656 // if (Counter < N) goto body; else goto exit;
3657 CGF.EmitBlock(ContDest.getBlock());
3658 auto *CVal =
3659 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3660 llvm::Value *Cmp =
3661 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3662 ? CGF.Builder.CreateICmpSLT(CVal, N)
3663 : CGF.Builder.CreateICmpULT(CVal, N);
3664 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3665 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3666 // body:
3667 CGF.EmitBlock(BodyBB);
3668 // Iteri = Begini + Counter * Stepi;
3669 CGF.EmitIgnoredExpr(HelperData.Update);
3672 ~OMPIteratorGeneratorScope() {
3673 if (!E)
3674 return;
3675 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3676 // Counter = Counter + 1;
3677 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3678 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3679 // goto cont;
3680 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3681 // exit:
3682 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3686 } // namespace
3688 static std::pair<llvm::Value *, llvm::Value *>
3689 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3690 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3691 llvm::Value *Addr;
3692 if (OASE) {
3693 const Expr *Base = OASE->getBase();
3694 Addr = CGF.EmitScalarExpr(Base);
3695 } else {
3696 Addr = CGF.EmitLValue(E).getPointer(CGF);
3698 llvm::Value *SizeVal;
3699 QualType Ty = E->getType();
3700 if (OASE) {
3701 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3702 for (const Expr *SE : OASE->getDimensions()) {
3703 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3704 Sz = CGF.EmitScalarConversion(
3705 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3706 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3708 } else if (const auto *ASE =
3709 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3710 LValue UpAddrLVal =
3711 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
3712 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3713 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3714 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
3715 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3716 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3717 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3718 } else {
3719 SizeVal = CGF.getTypeSize(Ty);
3721 return std::make_pair(Addr, SizeVal);
3724 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3725 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3726 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3727 if (KmpTaskAffinityInfoTy.isNull()) {
3728 RecordDecl *KmpAffinityInfoRD =
3729 C.buildImplicitRecord("kmp_task_affinity_info_t");
3730 KmpAffinityInfoRD->startDefinition();
3731 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3732 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3733 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3734 KmpAffinityInfoRD->completeDefinition();
3735 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3739 CGOpenMPRuntime::TaskResultTy
3740 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3741 const OMPExecutableDirective &D,
3742 llvm::Function *TaskFunction, QualType SharedsTy,
3743 Address Shareds, const OMPTaskDataTy &Data) {
3744 ASTContext &C = CGM.getContext();
3745 llvm::SmallVector<PrivateDataTy, 4> Privates;
3746 // Aggregate privates and sort them by the alignment.
3747 const auto *I = Data.PrivateCopies.begin();
3748 for (const Expr *E : Data.PrivateVars) {
3749 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3750 Privates.emplace_back(
3751 C.getDeclAlign(VD),
3752 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3753 /*PrivateElemInit=*/nullptr));
3754 ++I;
3756 I = Data.FirstprivateCopies.begin();
3757 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3758 for (const Expr *E : Data.FirstprivateVars) {
3759 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3760 Privates.emplace_back(
3761 C.getDeclAlign(VD),
3762 PrivateHelpersTy(
3763 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3764 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3765 ++I;
3766 ++IElemInitRef;
3768 I = Data.LastprivateCopies.begin();
3769 for (const Expr *E : Data.LastprivateVars) {
3770 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3771 Privates.emplace_back(
3772 C.getDeclAlign(VD),
3773 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3774 /*PrivateElemInit=*/nullptr));
3775 ++I;
3777 for (const VarDecl *VD : Data.PrivateLocals) {
3778 if (isAllocatableDecl(VD))
3779 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3780 else
3781 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3783 llvm::stable_sort(Privates,
3784 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3785 return L.first > R.first;
3787 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3788 // Build type kmp_routine_entry_t (if not built yet).
3789 emitKmpRoutineEntryT(KmpInt32Ty);
3790 // Build type kmp_task_t (if not built yet).
3791 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3792 if (SavedKmpTaskloopTQTy.isNull()) {
3793 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3794 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3796 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3797 } else {
3798 assert((D.getDirectiveKind() == OMPD_task ||
3799 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3800 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3801 "Expected taskloop, task or target directive");
3802 if (SavedKmpTaskTQTy.isNull()) {
3803 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3804 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3806 KmpTaskTQTy = SavedKmpTaskTQTy;
3808 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3809 // Build particular struct kmp_task_t for the given task.
3810 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3811 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3812 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3813 QualType KmpTaskTWithPrivatesPtrQTy =
3814 C.getPointerType(KmpTaskTWithPrivatesQTy);
3815 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3816 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3817 KmpTaskTWithPrivatesTy->getPointerTo();
3818 llvm::Value *KmpTaskTWithPrivatesTySize =
3819 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3820 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3822 // Emit initial values for private copies (if any).
3823 llvm::Value *TaskPrivatesMap = nullptr;
3824 llvm::Type *TaskPrivatesMapTy =
3825 std::next(TaskFunction->arg_begin(), 3)->getType();
3826 if (!Privates.empty()) {
3827 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3828 TaskPrivatesMap =
3829 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3830 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3831 TaskPrivatesMap, TaskPrivatesMapTy);
3832 } else {
3833 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3834 cast<llvm::PointerType>(TaskPrivatesMapTy));
3836 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3837 // kmp_task_t *tt);
3838 llvm::Function *TaskEntry = emitProxyTaskFunction(
3839 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3840 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3841 TaskPrivatesMap);
3843 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3844 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3845 // kmp_routine_entry_t *task_entry);
3846 // Task flags. Format is taken from
3847 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3848 // description of kmp_tasking_flags struct.
3849 enum {
3850 TiedFlag = 0x1,
3851 FinalFlag = 0x2,
3852 DestructorsFlag = 0x8,
3853 PriorityFlag = 0x20,
3854 DetachableFlag = 0x40,
3856 unsigned Flags = Data.Tied ? TiedFlag : 0;
3857 bool NeedsCleanup = false;
3858 if (!Privates.empty()) {
3859 NeedsCleanup =
3860 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3861 if (NeedsCleanup)
3862 Flags = Flags | DestructorsFlag;
3864 if (Data.Priority.getInt())
3865 Flags = Flags | PriorityFlag;
3866 if (D.hasClausesOfKind<OMPDetachClause>())
3867 Flags = Flags | DetachableFlag;
3868 llvm::Value *TaskFlags =
3869 Data.Final.getPointer()
3870 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3871 CGF.Builder.getInt32(FinalFlag),
3872 CGF.Builder.getInt32(/*C=*/0))
3873 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3874 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3875 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3876 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3877 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3878 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3879 TaskEntry, KmpRoutineEntryPtrTy)};
3880 llvm::Value *NewTask;
3881 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3882 // Check if we have any device clause associated with the directive.
3883 const Expr *Device = nullptr;
3884 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3885 Device = C->getDevice();
3886 // Emit device ID if any otherwise use default value.
3887 llvm::Value *DeviceID;
3888 if (Device)
3889 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3890 CGF.Int64Ty, /*isSigned=*/true);
3891 else
3892 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3893 AllocArgs.push_back(DeviceID);
3894 NewTask = CGF.EmitRuntimeCall(
3895 OMPBuilder.getOrCreateRuntimeFunction(
3896 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3897 AllocArgs);
3898 } else {
3899 NewTask =
3900 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3901 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3902 AllocArgs);
3904 // Emit detach clause initialization.
3905 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3906 // task_descriptor);
3907 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3908 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3909 LValue EvtLVal = CGF.EmitLValue(Evt);
3911 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3912 // int gtid, kmp_task_t *task);
3913 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3914 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3915 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3916 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3917 OMPBuilder.getOrCreateRuntimeFunction(
3918 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3919 {Loc, Tid, NewTask});
3920 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3921 Evt->getExprLoc());
3922 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3924 // Process affinity clauses.
3925 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3926 // Process list of affinity data.
3927 ASTContext &C = CGM.getContext();
3928 Address AffinitiesArray = Address::invalid();
3929 // Calculate number of elements to form the array of affinity data.
3930 llvm::Value *NumOfElements = nullptr;
3931 unsigned NumAffinities = 0;
3932 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3933 if (const Expr *Modifier = C->getModifier()) {
3934 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3935 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3936 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3937 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3938 NumOfElements =
3939 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3941 } else {
3942 NumAffinities += C->varlist_size();
3945 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3946 // Fields ids in kmp_task_affinity_info record.
3947 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3949 QualType KmpTaskAffinityInfoArrayTy;
3950 if (NumOfElements) {
3951 NumOfElements = CGF.Builder.CreateNUWAdd(
3952 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3953 auto *OVE = new (C) OpaqueValueExpr(
3954 Loc,
3955 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3956 VK_PRValue);
3957 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3958 RValue::get(NumOfElements));
3959 KmpTaskAffinityInfoArrayTy =
3960 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
3961 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3962 // Properly emit variable-sized array.
3963 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3964 ImplicitParamDecl::Other);
3965 CGF.EmitVarDecl(*PD);
3966 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3967 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3968 /*isSigned=*/false);
3969 } else {
3970 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3971 KmpTaskAffinityInfoTy,
3972 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3973 ArrayType::Normal, /*IndexTypeQuals=*/0);
3974 AffinitiesArray =
3975 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3976 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3977 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3978 /*isSigned=*/false);
3981 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3982 // Fill array by elements without iterators.
3983 unsigned Pos = 0;
3984 bool HasIterator = false;
3985 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3986 if (C->getModifier()) {
3987 HasIterator = true;
3988 continue;
3990 for (const Expr *E : C->varlists()) {
3991 llvm::Value *Addr;
3992 llvm::Value *Size;
3993 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3994 LValue Base =
3995 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3996 KmpTaskAffinityInfoTy);
3997 // affs[i].base_addr = &<Affinities[i].second>;
3998 LValue BaseAddrLVal = CGF.EmitLValueForField(
3999 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4000 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4001 BaseAddrLVal);
4002 // affs[i].len = sizeof(<Affinities[i].second>);
4003 LValue LenLVal = CGF.EmitLValueForField(
4004 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4005 CGF.EmitStoreOfScalar(Size, LenLVal);
4006 ++Pos;
4009 LValue PosLVal;
4010 if (HasIterator) {
4011 PosLVal = CGF.MakeAddrLValue(
4012 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4013 C.getSizeType());
4014 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4016 // Process elements with iterators.
4017 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4018 const Expr *Modifier = C->getModifier();
4019 if (!Modifier)
4020 continue;
4021 OMPIteratorGeneratorScope IteratorScope(
4022 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4023 for (const Expr *E : C->varlists()) {
4024 llvm::Value *Addr;
4025 llvm::Value *Size;
4026 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4027 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4028 LValue Base = CGF.MakeAddrLValue(
4029 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4030 // affs[i].base_addr = &<Affinities[i].second>;
4031 LValue BaseAddrLVal = CGF.EmitLValueForField(
4032 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4033 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4034 BaseAddrLVal);
4035 // affs[i].len = sizeof(<Affinities[i].second>);
4036 LValue LenLVal = CGF.EmitLValueForField(
4037 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4038 CGF.EmitStoreOfScalar(Size, LenLVal);
4039 Idx = CGF.Builder.CreateNUWAdd(
4040 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4041 CGF.EmitStoreOfScalar(Idx, PosLVal);
4044 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4045 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4046 // naffins, kmp_task_affinity_info_t *affin_list);
4047 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4048 llvm::Value *GTid = getThreadID(CGF, Loc);
4049 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4050 AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4051 // FIXME: Emit the function and ignore its result for now unless the
4052 // runtime function is properly implemented.
4053 (void)CGF.EmitRuntimeCall(
4054 OMPBuilder.getOrCreateRuntimeFunction(
4055 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4056 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4058 llvm::Value *NewTaskNewTaskTTy =
4059 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4060 NewTask, KmpTaskTWithPrivatesPtrTy);
4061 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4062 KmpTaskTWithPrivatesQTy);
4063 LValue TDBase =
4064 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4065 // Fill the data in the resulting kmp_task_t record.
4066 // Copy shareds if there are any.
4067 Address KmpTaskSharedsPtr = Address::invalid();
4068 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4069 KmpTaskSharedsPtr = Address(
4070 CGF.EmitLoadOfScalar(
4071 CGF.EmitLValueForField(
4072 TDBase,
4073 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4074 Loc),
4075 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4076 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4077 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4078 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4080 // Emit initial values for private copies (if any).
4081 TaskResultTy Result;
4082 if (!Privates.empty()) {
4083 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4084 SharedsTy, SharedsPtrTy, Data, Privates,
4085 /*ForDup=*/false);
4086 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4087 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4088 Result.TaskDupFn = emitTaskDupFunction(
4089 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4090 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4091 /*WithLastIter=*/!Data.LastprivateVars.empty());
4094 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4095 enum { Priority = 0, Destructors = 1 };
4096 // Provide pointer to function with destructors for privates.
4097 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4098 const RecordDecl *KmpCmplrdataUD =
4099 (*FI)->getType()->getAsUnionType()->getDecl();
4100 if (NeedsCleanup) {
4101 llvm::Value *DestructorFn = emitDestructorsFunction(
4102 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4103 KmpTaskTWithPrivatesQTy);
4104 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4105 LValue DestructorsLV = CGF.EmitLValueForField(
4106 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4107 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4108 DestructorFn, KmpRoutineEntryPtrTy),
4109 DestructorsLV);
4111 // Set priority.
4112 if (Data.Priority.getInt()) {
4113 LValue Data2LV = CGF.EmitLValueForField(
4114 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4115 LValue PriorityLV = CGF.EmitLValueForField(
4116 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4117 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4119 Result.NewTask = NewTask;
4120 Result.TaskEntry = TaskEntry;
4121 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4122 Result.TDBase = TDBase;
4123 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4124 return Result;
4127 /// Translates internal dependency kind into the runtime kind.
4128 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4129 RTLDependenceKindTy DepKind;
4130 switch (K) {
4131 case OMPC_DEPEND_in:
4132 DepKind = RTLDependenceKindTy::DepIn;
4133 break;
4134 // Out and InOut dependencies must use the same code.
4135 case OMPC_DEPEND_out:
4136 case OMPC_DEPEND_inout:
4137 DepKind = RTLDependenceKindTy::DepInOut;
4138 break;
4139 case OMPC_DEPEND_mutexinoutset:
4140 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4141 break;
4142 case OMPC_DEPEND_inoutset:
4143 DepKind = RTLDependenceKindTy::DepInOutSet;
4144 break;
4145 case OMPC_DEPEND_outallmemory:
4146 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4147 break;
4148 case OMPC_DEPEND_source:
4149 case OMPC_DEPEND_sink:
4150 case OMPC_DEPEND_depobj:
4151 case OMPC_DEPEND_inoutallmemory:
4152 case OMPC_DEPEND_unknown:
4153 llvm_unreachable("Unknown task dependence type");
4155 return DepKind;
4158 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4159 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4160 QualType &FlagsTy) {
4161 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4162 if (KmpDependInfoTy.isNull()) {
4163 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4164 KmpDependInfoRD->startDefinition();
4165 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4166 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4167 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4168 KmpDependInfoRD->completeDefinition();
4169 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4173 std::pair<llvm::Value *, LValue>
4174 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4175 SourceLocation Loc) {
4176 ASTContext &C = CGM.getContext();
4177 QualType FlagsTy;
4178 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4179 RecordDecl *KmpDependInfoRD =
4180 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4181 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4182 LValue Base = CGF.EmitLoadOfPointerLValue(
4183 DepobjLVal.getAddress(CGF).withElementType(
4184 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4185 KmpDependInfoPtrTy->castAs<PointerType>());
4186 Address DepObjAddr = CGF.Builder.CreateGEP(
4187 Base.getAddress(CGF),
4188 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4189 LValue NumDepsBase = CGF.MakeAddrLValue(
4190 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4191 // NumDeps = deps[i].base_addr;
4192 LValue BaseAddrLVal = CGF.EmitLValueForField(
4193 NumDepsBase,
4194 *std::next(KmpDependInfoRD->field_begin(),
4195 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4196 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4197 return std::make_pair(NumDeps, Base);
4200 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4201 llvm::PointerUnion<unsigned *, LValue *> Pos,
4202 const OMPTaskDataTy::DependData &Data,
4203 Address DependenciesArray) {
4204 CodeGenModule &CGM = CGF.CGM;
4205 ASTContext &C = CGM.getContext();
4206 QualType FlagsTy;
4207 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4208 RecordDecl *KmpDependInfoRD =
4209 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4210 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4212 OMPIteratorGeneratorScope IteratorScope(
4213 CGF, cast_or_null<OMPIteratorExpr>(
4214 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4215 : nullptr));
4216 for (const Expr *E : Data.DepExprs) {
4217 llvm::Value *Addr;
4218 llvm::Value *Size;
4220 // The expression will be a nullptr in the 'omp_all_memory' case.
4221 if (E) {
4222 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4223 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4224 } else {
4225 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4226 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4228 LValue Base;
4229 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4230 Base = CGF.MakeAddrLValue(
4231 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4232 } else {
4233 assert(E && "Expected a non-null expression");
4234 LValue &PosLVal = *Pos.get<LValue *>();
4235 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4236 Base = CGF.MakeAddrLValue(
4237 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4239 // deps[i].base_addr = &<Dependencies[i].second>;
4240 LValue BaseAddrLVal = CGF.EmitLValueForField(
4241 Base,
4242 *std::next(KmpDependInfoRD->field_begin(),
4243 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4244 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4245 // deps[i].len = sizeof(<Dependencies[i].second>);
4246 LValue LenLVal = CGF.EmitLValueForField(
4247 Base, *std::next(KmpDependInfoRD->field_begin(),
4248 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4249 CGF.EmitStoreOfScalar(Size, LenLVal);
4250 // deps[i].flags = <Dependencies[i].first>;
4251 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4252 LValue FlagsLVal = CGF.EmitLValueForField(
4253 Base,
4254 *std::next(KmpDependInfoRD->field_begin(),
4255 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4256 CGF.EmitStoreOfScalar(
4257 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4258 FlagsLVal);
4259 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4260 ++(*P);
4261 } else {
4262 LValue &PosLVal = *Pos.get<LValue *>();
4263 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4264 Idx = CGF.Builder.CreateNUWAdd(Idx,
4265 llvm::ConstantInt::get(Idx->getType(), 1));
4266 CGF.EmitStoreOfScalar(Idx, PosLVal);
4271 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4272 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4273 const OMPTaskDataTy::DependData &Data) {
4274 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4275 "Expected depobj dependency kind.");
4276 SmallVector<llvm::Value *, 4> Sizes;
4277 SmallVector<LValue, 4> SizeLVals;
4278 ASTContext &C = CGF.getContext();
4280 OMPIteratorGeneratorScope IteratorScope(
4281 CGF, cast_or_null<OMPIteratorExpr>(
4282 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4283 : nullptr));
4284 for (const Expr *E : Data.DepExprs) {
4285 llvm::Value *NumDeps;
4286 LValue Base;
4287 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4288 std::tie(NumDeps, Base) =
4289 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4290 LValue NumLVal = CGF.MakeAddrLValue(
4291 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4292 C.getUIntPtrType());
4293 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4294 NumLVal.getAddress(CGF));
4295 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4296 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4297 CGF.EmitStoreOfScalar(Add, NumLVal);
4298 SizeLVals.push_back(NumLVal);
4301 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4302 llvm::Value *Size =
4303 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4304 Sizes.push_back(Size);
4306 return Sizes;
4309 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4310 QualType &KmpDependInfoTy,
4311 LValue PosLVal,
4312 const OMPTaskDataTy::DependData &Data,
4313 Address DependenciesArray) {
4314 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4315 "Expected depobj dependency kind.");
4316 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4318 OMPIteratorGeneratorScope IteratorScope(
4319 CGF, cast_or_null<OMPIteratorExpr>(
4320 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4321 : nullptr));
4322 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4323 const Expr *E = Data.DepExprs[I];
4324 llvm::Value *NumDeps;
4325 LValue Base;
4326 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4327 std::tie(NumDeps, Base) =
4328 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4330 // memcopy dependency data.
4331 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4332 ElSize,
4333 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4334 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4335 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4336 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4338 // Increase pos.
4339 // pos += size;
4340 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4341 CGF.EmitStoreOfScalar(Add, PosLVal);
4346 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4347 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4348 SourceLocation Loc) {
4349 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4350 return D.DepExprs.empty();
4352 return std::make_pair(nullptr, Address::invalid());
4353 // Process list of dependencies.
4354 ASTContext &C = CGM.getContext();
4355 Address DependenciesArray = Address::invalid();
4356 llvm::Value *NumOfElements = nullptr;
4357 unsigned NumDependencies = std::accumulate(
4358 Dependencies.begin(), Dependencies.end(), 0,
4359 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4360 return D.DepKind == OMPC_DEPEND_depobj
4362 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4364 QualType FlagsTy;
4365 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4366 bool HasDepobjDeps = false;
4367 bool HasRegularWithIterators = false;
4368 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4369 llvm::Value *NumOfRegularWithIterators =
4370 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4371 // Calculate number of depobj dependencies and regular deps with the
4372 // iterators.
4373 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4374 if (D.DepKind == OMPC_DEPEND_depobj) {
4375 SmallVector<llvm::Value *, 4> Sizes =
4376 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4377 for (llvm::Value *Size : Sizes) {
4378 NumOfDepobjElements =
4379 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4381 HasDepobjDeps = true;
4382 continue;
4384 // Include number of iterations, if any.
4386 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4387 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4388 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4389 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4390 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4391 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4392 NumOfRegularWithIterators =
4393 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4395 HasRegularWithIterators = true;
4396 continue;
4400 QualType KmpDependInfoArrayTy;
4401 if (HasDepobjDeps || HasRegularWithIterators) {
4402 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4403 /*isSigned=*/false);
4404 if (HasDepobjDeps) {
4405 NumOfElements =
4406 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4408 if (HasRegularWithIterators) {
4409 NumOfElements =
4410 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4412 auto *OVE = new (C) OpaqueValueExpr(
4413 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4414 VK_PRValue);
4415 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4416 RValue::get(NumOfElements));
4417 KmpDependInfoArrayTy =
4418 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4419 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4420 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4421 // Properly emit variable-sized array.
4422 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4423 ImplicitParamDecl::Other);
4424 CGF.EmitVarDecl(*PD);
4425 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4426 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4427 /*isSigned=*/false);
4428 } else {
4429 KmpDependInfoArrayTy = C.getConstantArrayType(
4430 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4431 ArrayType::Normal, /*IndexTypeQuals=*/0);
4432 DependenciesArray =
4433 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4434 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4435 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4436 /*isSigned=*/false);
4438 unsigned Pos = 0;
4439 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4440 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4441 Dependencies[I].IteratorExpr)
4442 continue;
4443 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4444 DependenciesArray);
4446 // Copy regular dependencies with iterators.
4447 LValue PosLVal = CGF.MakeAddrLValue(
4448 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4449 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4450 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4451 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4452 !Dependencies[I].IteratorExpr)
4453 continue;
4454 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4455 DependenciesArray);
4457 // Copy final depobj arrays without iterators.
4458 if (HasDepobjDeps) {
4459 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4460 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4461 continue;
4462 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4463 DependenciesArray);
4466 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4467 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4468 return std::make_pair(NumOfElements, DependenciesArray);
4471 Address CGOpenMPRuntime::emitDepobjDependClause(
4472 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4473 SourceLocation Loc) {
4474 if (Dependencies.DepExprs.empty())
4475 return Address::invalid();
4476 // Process list of dependencies.
4477 ASTContext &C = CGM.getContext();
4478 Address DependenciesArray = Address::invalid();
4479 unsigned NumDependencies = Dependencies.DepExprs.size();
4480 QualType FlagsTy;
4481 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4482 RecordDecl *KmpDependInfoRD =
4483 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4485 llvm::Value *Size;
4486 // Define type kmp_depend_info[<Dependencies.size()>];
4487 // For depobj reserve one extra element to store the number of elements.
4488 // It is required to handle depobj(x) update(in) construct.
4489 // kmp_depend_info[<Dependencies.size()>] deps;
4490 llvm::Value *NumDepsVal;
4491 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4492 if (const auto *IE =
4493 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4494 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4495 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4496 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4497 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4498 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4500 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4501 NumDepsVal);
4502 CharUnits SizeInBytes =
4503 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4504 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4505 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4506 NumDepsVal =
4507 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4508 } else {
4509 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4510 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4511 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4512 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4513 Size = CGM.getSize(Sz.alignTo(Align));
4514 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4516 // Need to allocate on the dynamic memory.
4517 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4518 // Use default allocator.
4519 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4520 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4522 llvm::Value *Addr =
4523 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4524 CGM.getModule(), OMPRTL___kmpc_alloc),
4525 Args, ".dep.arr.addr");
4526 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4527 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4528 Addr, KmpDependInfoLlvmTy->getPointerTo());
4529 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4530 // Write number of elements in the first element of array for depobj.
4531 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4532 // deps[i].base_addr = NumDependencies;
4533 LValue BaseAddrLVal = CGF.EmitLValueForField(
4534 Base,
4535 *std::next(KmpDependInfoRD->field_begin(),
4536 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4537 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4538 llvm::PointerUnion<unsigned *, LValue *> Pos;
4539 unsigned Idx = 1;
4540 LValue PosLVal;
4541 if (Dependencies.IteratorExpr) {
4542 PosLVal = CGF.MakeAddrLValue(
4543 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4544 C.getSizeType());
4545 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4546 /*IsInit=*/true);
4547 Pos = &PosLVal;
4548 } else {
4549 Pos = &Idx;
4551 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4552 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4553 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4554 CGF.Int8Ty);
4555 return DependenciesArray;
4558 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4559 SourceLocation Loc) {
4560 ASTContext &C = CGM.getContext();
4561 QualType FlagsTy;
4562 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4563 LValue Base = CGF.EmitLoadOfPointerLValue(
4564 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4565 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4566 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4567 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4568 CGF.ConvertTypeForMem(KmpDependInfoTy));
4569 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4570 Addr.getElementType(), Addr.getPointer(),
4571 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4572 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4573 CGF.VoidPtrTy);
4574 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4575 // Use default allocator.
4576 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4577 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4579 // _kmpc_free(gtid, addr, nullptr);
4580 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4581 CGM.getModule(), OMPRTL___kmpc_free),
4582 Args);
4585 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4586 OpenMPDependClauseKind NewDepKind,
4587 SourceLocation Loc) {
4588 ASTContext &C = CGM.getContext();
4589 QualType FlagsTy;
4590 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4591 RecordDecl *KmpDependInfoRD =
4592 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4593 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4594 llvm::Value *NumDeps;
4595 LValue Base;
4596 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4598 Address Begin = Base.getAddress(CGF);
4599 // Cast from pointer to array type to pointer to single element.
4600 llvm::Value *End = CGF.Builder.CreateGEP(
4601 Begin.getElementType(), Begin.getPointer(), NumDeps);
4602 // The basic structure here is a while-do loop.
4603 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4604 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4605 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4606 CGF.EmitBlock(BodyBB);
4607 llvm::PHINode *ElementPHI =
4608 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4609 ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4610 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4611 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4612 Base.getTBAAInfo());
4613 // deps[i].flags = NewDepKind;
4614 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4615 LValue FlagsLVal = CGF.EmitLValueForField(
4616 Base, *std::next(KmpDependInfoRD->field_begin(),
4617 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4618 CGF.EmitStoreOfScalar(
4619 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4620 FlagsLVal);
4622 // Shift the address forward by one element.
4623 Address ElementNext =
4624 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4625 ElementPHI->addIncoming(ElementNext.getPointer(),
4626 CGF.Builder.GetInsertBlock());
4627 llvm::Value *IsEmpty =
4628 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4629 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4630 // Done.
4631 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4634 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4635 const OMPExecutableDirective &D,
4636 llvm::Function *TaskFunction,
4637 QualType SharedsTy, Address Shareds,
4638 const Expr *IfCond,
4639 const OMPTaskDataTy &Data) {
4640 if (!CGF.HaveInsertPoint())
4641 return;
4643 TaskResultTy Result =
4644 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4645 llvm::Value *NewTask = Result.NewTask;
4646 llvm::Function *TaskEntry = Result.TaskEntry;
4647 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4648 LValue TDBase = Result.TDBase;
4649 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4650 // Process list of dependences.
4651 Address DependenciesArray = Address::invalid();
4652 llvm::Value *NumOfElements;
4653 std::tie(NumOfElements, DependenciesArray) =
4654 emitDependClause(CGF, Data.Dependences, Loc);
4656 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4657 // libcall.
4658 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4659 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4660 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4661 // list is not empty
4662 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4663 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4664 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4665 llvm::Value *DepTaskArgs[7];
4666 if (!Data.Dependences.empty()) {
4667 DepTaskArgs[0] = UpLoc;
4668 DepTaskArgs[1] = ThreadID;
4669 DepTaskArgs[2] = NewTask;
4670 DepTaskArgs[3] = NumOfElements;
4671 DepTaskArgs[4] = DependenciesArray.getPointer();
4672 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4673 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4675 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4676 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4677 if (!Data.Tied) {
4678 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4679 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4680 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4682 if (!Data.Dependences.empty()) {
4683 CGF.EmitRuntimeCall(
4684 OMPBuilder.getOrCreateRuntimeFunction(
4685 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4686 DepTaskArgs);
4687 } else {
4688 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4689 CGM.getModule(), OMPRTL___kmpc_omp_task),
4690 TaskArgs);
4692 // Check if parent region is untied and build return for untied task;
4693 if (auto *Region =
4694 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4695 Region->emitUntiedSwitch(CGF);
4698 llvm::Value *DepWaitTaskArgs[7];
4699 if (!Data.Dependences.empty()) {
4700 DepWaitTaskArgs[0] = UpLoc;
4701 DepWaitTaskArgs[1] = ThreadID;
4702 DepWaitTaskArgs[2] = NumOfElements;
4703 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4704 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4705 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4706 DepWaitTaskArgs[6] =
4707 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4709 auto &M = CGM.getModule();
4710 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4711 TaskEntry, &Data, &DepWaitTaskArgs,
4712 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4713 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4714 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4715 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4716 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4717 // is specified.
4718 if (!Data.Dependences.empty())
4719 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4720 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4721 DepWaitTaskArgs);
4722 // Call proxy_task_entry(gtid, new_task);
4723 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4724 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4725 Action.Enter(CGF);
4726 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4727 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4728 OutlinedFnArgs);
4731 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4732 // kmp_task_t *new_task);
4733 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4734 // kmp_task_t *new_task);
4735 RegionCodeGenTy RCG(CodeGen);
4736 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4737 M, OMPRTL___kmpc_omp_task_begin_if0),
4738 TaskArgs,
4739 OMPBuilder.getOrCreateRuntimeFunction(
4740 M, OMPRTL___kmpc_omp_task_complete_if0),
4741 TaskArgs);
4742 RCG.setAction(Action);
4743 RCG(CGF);
4746 if (IfCond) {
4747 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4748 } else {
4749 RegionCodeGenTy ThenRCG(ThenCodeGen);
4750 ThenRCG(CGF);
4754 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4755 const OMPLoopDirective &D,
4756 llvm::Function *TaskFunction,
4757 QualType SharedsTy, Address Shareds,
4758 const Expr *IfCond,
4759 const OMPTaskDataTy &Data) {
4760 if (!CGF.HaveInsertPoint())
4761 return;
4762 TaskResultTy Result =
4763 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4764 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4765 // libcall.
4766 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4767 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4768 // sched, kmp_uint64 grainsize, void *task_dup);
4769 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4770 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4771 llvm::Value *IfVal;
4772 if (IfCond) {
4773 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4774 /*isSigned=*/true);
4775 } else {
4776 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4779 LValue LBLVal = CGF.EmitLValueForField(
4780 Result.TDBase,
4781 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4782 const auto *LBVar =
4783 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4784 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
4785 LBLVal.getQuals(),
4786 /*IsInitializer=*/true);
4787 LValue UBLVal = CGF.EmitLValueForField(
4788 Result.TDBase,
4789 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4790 const auto *UBVar =
4791 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4792 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
4793 UBLVal.getQuals(),
4794 /*IsInitializer=*/true);
4795 LValue StLVal = CGF.EmitLValueForField(
4796 Result.TDBase,
4797 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4798 const auto *StVar =
4799 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4800 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
4801 StLVal.getQuals(),
4802 /*IsInitializer=*/true);
4803 // Store reductions address.
4804 LValue RedLVal = CGF.EmitLValueForField(
4805 Result.TDBase,
4806 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4807 if (Data.Reductions) {
4808 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4809 } else {
4810 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
4811 CGF.getContext().VoidPtrTy);
4813 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4814 llvm::Value *TaskArgs[] = {
4815 UpLoc,
4816 ThreadID,
4817 Result.NewTask,
4818 IfVal,
4819 LBLVal.getPointer(CGF),
4820 UBLVal.getPointer(CGF),
4821 CGF.EmitLoadOfScalar(StLVal, Loc),
4822 llvm::ConstantInt::getSigned(
4823 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4824 llvm::ConstantInt::getSigned(
4825 CGF.IntTy, Data.Schedule.getPointer()
4826 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4827 : NoSchedule),
4828 Data.Schedule.getPointer()
4829 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4830 /*isSigned=*/false)
4831 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4832 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4833 Result.TaskDupFn, CGF.VoidPtrTy)
4834 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4835 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4836 CGM.getModule(), OMPRTL___kmpc_taskloop),
4837 TaskArgs);
4840 /// Emit reduction operation for each element of array (required for
4841 /// array sections) LHS op = RHS.
4842 /// \param Type Type of array.
4843 /// \param LHSVar Variable on the left side of the reduction operation
4844 /// (references element of array in original variable).
4845 /// \param RHSVar Variable on the right side of the reduction operation
4846 /// (references element of array in original variable).
4847 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4848 /// RHSVar.
4849 static void EmitOMPAggregateReduction(
4850 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4851 const VarDecl *RHSVar,
4852 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4853 const Expr *, const Expr *)> &RedOpGen,
4854 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4855 const Expr *UpExpr = nullptr) {
4856 // Perform element-by-element initialization.
4857 QualType ElementTy;
4858 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4859 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4861 // Drill down to the base element type on both arrays.
4862 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4863 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4865 llvm::Value *RHSBegin = RHSAddr.getPointer();
4866 llvm::Value *LHSBegin = LHSAddr.getPointer();
4867 // Cast from pointer to array type to pointer to single element.
4868 llvm::Value *LHSEnd =
4869 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4870 // The basic structure here is a while-do loop.
4871 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4872 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4873 llvm::Value *IsEmpty =
4874 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4875 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4877 // Enter the loop body, making that address the current address.
4878 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4879 CGF.EmitBlock(BodyBB);
4881 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4883 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4884 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4885 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4886 Address RHSElementCurrent(
4887 RHSElementPHI, RHSAddr.getElementType(),
4888 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4890 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4891 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4892 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4893 Address LHSElementCurrent(
4894 LHSElementPHI, LHSAddr.getElementType(),
4895 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4897 // Emit copy.
4898 CodeGenFunction::OMPPrivateScope Scope(CGF);
4899 Scope.addPrivate(LHSVar, LHSElementCurrent);
4900 Scope.addPrivate(RHSVar, RHSElementCurrent);
4901 Scope.Privatize();
4902 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4903 Scope.ForceCleanup();
4905 // Shift the address forward by one element.
4906 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4907 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4908 "omp.arraycpy.dest.element");
4909 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4910 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4911 "omp.arraycpy.src.element");
4912 // Check whether we've reached the end.
4913 llvm::Value *Done =
4914 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4915 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4916 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4917 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4919 // Done.
4920 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4923 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4924 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4925 /// UDR combiner function.
4926 static void emitReductionCombiner(CodeGenFunction &CGF,
4927 const Expr *ReductionOp) {
4928 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4929 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4930 if (const auto *DRE =
4931 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4932 if (const auto *DRD =
4933 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4934 std::pair<llvm::Function *, llvm::Function *> Reduction =
4935 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4936 RValue Func = RValue::get(Reduction.first);
4937 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4938 CGF.EmitIgnoredExpr(ReductionOp);
4939 return;
4941 CGF.EmitIgnoredExpr(ReductionOp);
4944 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4945 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4946 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4947 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4948 ASTContext &C = CGM.getContext();
4950 // void reduction_func(void *LHSArg, void *RHSArg);
4951 FunctionArgList Args;
4952 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4953 ImplicitParamDecl::Other);
4954 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4955 ImplicitParamDecl::Other);
4956 Args.push_back(&LHSArg);
4957 Args.push_back(&RHSArg);
4958 const auto &CGFI =
4959 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4960 std::string Name = getReductionFuncName(ReducerName);
4961 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4962 llvm::GlobalValue::InternalLinkage, Name,
4963 &CGM.getModule());
4964 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4965 Fn->setDoesNotRecurse();
4966 CodeGenFunction CGF(CGM);
4967 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4969 // Dst = (void*[n])(LHSArg);
4970 // Src = (void*[n])(RHSArg);
4971 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4972 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4973 ArgsElemType->getPointerTo()),
4974 ArgsElemType, CGF.getPointerAlign());
4975 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4976 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4977 ArgsElemType->getPointerTo()),
4978 ArgsElemType, CGF.getPointerAlign());
4980 // ...
4981 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4982 // ...
4983 CodeGenFunction::OMPPrivateScope Scope(CGF);
4984 const auto *IPriv = Privates.begin();
4985 unsigned Idx = 0;
4986 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4987 const auto *RHSVar =
4988 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4989 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4990 const auto *LHSVar =
4991 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4992 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4993 QualType PrivTy = (*IPriv)->getType();
4994 if (PrivTy->isVariablyModifiedType()) {
4995 // Get array size and emit VLA type.
4996 ++Idx;
4997 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4998 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4999 const VariableArrayType *VLA =
5000 CGF.getContext().getAsVariableArrayType(PrivTy);
5001 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5002 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5003 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5004 CGF.EmitVariablyModifiedType(PrivTy);
5007 Scope.Privatize();
5008 IPriv = Privates.begin();
5009 const auto *ILHS = LHSExprs.begin();
5010 const auto *IRHS = RHSExprs.begin();
5011 for (const Expr *E : ReductionOps) {
5012 if ((*IPriv)->getType()->isArrayType()) {
5013 // Emit reduction for array section.
5014 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5015 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5016 EmitOMPAggregateReduction(
5017 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5018 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5019 emitReductionCombiner(CGF, E);
5021 } else {
5022 // Emit reduction for array subscript or single variable.
5023 emitReductionCombiner(CGF, E);
5025 ++IPriv;
5026 ++ILHS;
5027 ++IRHS;
5029 Scope.ForceCleanup();
5030 CGF.FinishFunction();
5031 return Fn;
5034 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5035 const Expr *ReductionOp,
5036 const Expr *PrivateRef,
5037 const DeclRefExpr *LHS,
5038 const DeclRefExpr *RHS) {
5039 if (PrivateRef->getType()->isArrayType()) {
5040 // Emit reduction for array section.
5041 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5042 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5043 EmitOMPAggregateReduction(
5044 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5045 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5046 emitReductionCombiner(CGF, ReductionOp);
5048 } else {
5049 // Emit reduction for array subscript or single variable.
5050 emitReductionCombiner(CGF, ReductionOp);
5054 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5055 ArrayRef<const Expr *> Privates,
5056 ArrayRef<const Expr *> LHSExprs,
5057 ArrayRef<const Expr *> RHSExprs,
5058 ArrayRef<const Expr *> ReductionOps,
5059 ReductionOptionsTy Options) {
5060 if (!CGF.HaveInsertPoint())
5061 return;
5063 bool WithNowait = Options.WithNowait;
5064 bool SimpleReduction = Options.SimpleReduction;
5066 // Next code should be emitted for reduction:
5068 // static kmp_critical_name lock = { 0 };
5070 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5071 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5072 // ...
5073 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5074 // *(Type<n>-1*)rhs[<n>-1]);
5075 // }
5077 // ...
5078 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5079 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5080 // RedList, reduce_func, &<lock>)) {
5081 // case 1:
5082 // ...
5083 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5084 // ...
5085 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5086 // break;
5087 // case 2:
5088 // ...
5089 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5090 // ...
5091 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5092 // break;
5093 // default:;
5094 // }
5096 // if SimpleReduction is true, only the next code is generated:
5097 // ...
5098 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5099 // ...
5101 ASTContext &C = CGM.getContext();
5103 if (SimpleReduction) {
5104 CodeGenFunction::RunCleanupsScope Scope(CGF);
5105 const auto *IPriv = Privates.begin();
5106 const auto *ILHS = LHSExprs.begin();
5107 const auto *IRHS = RHSExprs.begin();
5108 for (const Expr *E : ReductionOps) {
5109 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5110 cast<DeclRefExpr>(*IRHS));
5111 ++IPriv;
5112 ++ILHS;
5113 ++IRHS;
5115 return;
5118 // 1. Build a list of reduction variables.
5119 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5120 auto Size = RHSExprs.size();
5121 for (const Expr *E : Privates) {
5122 if (E->getType()->isVariablyModifiedType())
5123 // Reserve place for array size.
5124 ++Size;
5126 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5127 QualType ReductionArrayTy =
5128 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5129 /*IndexTypeQuals=*/0);
5130 Address ReductionList =
5131 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5132 const auto *IPriv = Privates.begin();
5133 unsigned Idx = 0;
5134 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5135 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5136 CGF.Builder.CreateStore(
5137 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5138 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5139 Elem);
5140 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5141 // Store array size.
5142 ++Idx;
5143 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5144 llvm::Value *Size = CGF.Builder.CreateIntCast(
5145 CGF.getVLASize(
5146 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5147 .NumElts,
5148 CGF.SizeTy, /*isSigned=*/false);
5149 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5150 Elem);
5154 // 2. Emit reduce_func().
5155 llvm::Function *ReductionFn = emitReductionFunction(
5156 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5157 Privates, LHSExprs, RHSExprs, ReductionOps);
5159 // 3. Create static kmp_critical_name lock = { 0 };
5160 std::string Name = getName({"reduction"});
5161 llvm::Value *Lock = getCriticalRegionLock(Name);
5163 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5164 // RedList, reduce_func, &<lock>);
5165 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5166 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5167 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5168 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5169 ReductionList.getPointer(), CGF.VoidPtrTy);
5170 llvm::Value *Args[] = {
5171 IdentTLoc, // ident_t *<loc>
5172 ThreadId, // i32 <gtid>
5173 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5174 ReductionArrayTySize, // size_type sizeof(RedList)
5175 RL, // void *RedList
5176 ReductionFn, // void (*) (void *, void *) <reduce_func>
5177 Lock // kmp_critical_name *&<lock>
5179 llvm::Value *Res = CGF.EmitRuntimeCall(
5180 OMPBuilder.getOrCreateRuntimeFunction(
5181 CGM.getModule(),
5182 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5183 Args);
5185 // 5. Build switch(res)
5186 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5187 llvm::SwitchInst *SwInst =
5188 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5190 // 6. Build case 1:
5191 // ...
5192 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5193 // ...
5194 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5195 // break;
5196 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5197 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5198 CGF.EmitBlock(Case1BB);
5200 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5201 llvm::Value *EndArgs[] = {
5202 IdentTLoc, // ident_t *<loc>
5203 ThreadId, // i32 <gtid>
5204 Lock // kmp_critical_name *&<lock>
5206 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5207 CodeGenFunction &CGF, PrePostActionTy &Action) {
5208 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5209 const auto *IPriv = Privates.begin();
5210 const auto *ILHS = LHSExprs.begin();
5211 const auto *IRHS = RHSExprs.begin();
5212 for (const Expr *E : ReductionOps) {
5213 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5214 cast<DeclRefExpr>(*IRHS));
5215 ++IPriv;
5216 ++ILHS;
5217 ++IRHS;
5220 RegionCodeGenTy RCG(CodeGen);
5221 CommonActionTy Action(
5222 nullptr, std::nullopt,
5223 OMPBuilder.getOrCreateRuntimeFunction(
5224 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5225 : OMPRTL___kmpc_end_reduce),
5226 EndArgs);
5227 RCG.setAction(Action);
5228 RCG(CGF);
5230 CGF.EmitBranch(DefaultBB);
5232 // 7. Build case 2:
5233 // ...
5234 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5235 // ...
5236 // break;
5237 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5238 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5239 CGF.EmitBlock(Case2BB);
5241 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5242 CodeGenFunction &CGF, PrePostActionTy &Action) {
5243 const auto *ILHS = LHSExprs.begin();
5244 const auto *IRHS = RHSExprs.begin();
5245 const auto *IPriv = Privates.begin();
5246 for (const Expr *E : ReductionOps) {
5247 const Expr *XExpr = nullptr;
5248 const Expr *EExpr = nullptr;
5249 const Expr *UpExpr = nullptr;
5250 BinaryOperatorKind BO = BO_Comma;
5251 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5252 if (BO->getOpcode() == BO_Assign) {
5253 XExpr = BO->getLHS();
5254 UpExpr = BO->getRHS();
5257 // Try to emit update expression as a simple atomic.
5258 const Expr *RHSExpr = UpExpr;
5259 if (RHSExpr) {
5260 // Analyze RHS part of the whole expression.
5261 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5262 RHSExpr->IgnoreParenImpCasts())) {
5263 // If this is a conditional operator, analyze its condition for
5264 // min/max reduction operator.
5265 RHSExpr = ACO->getCond();
5267 if (const auto *BORHS =
5268 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5269 EExpr = BORHS->getRHS();
5270 BO = BORHS->getOpcode();
5273 if (XExpr) {
5274 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5275 auto &&AtomicRedGen = [BO, VD,
5276 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5277 const Expr *EExpr, const Expr *UpExpr) {
5278 LValue X = CGF.EmitLValue(XExpr);
5279 RValue E;
5280 if (EExpr)
5281 E = CGF.EmitAnyExpr(EExpr);
5282 CGF.EmitOMPAtomicSimpleUpdateExpr(
5283 X, E, BO, /*IsXLHSInRHSPart=*/true,
5284 llvm::AtomicOrdering::Monotonic, Loc,
5285 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5286 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5287 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5288 CGF.emitOMPSimpleStore(
5289 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5290 VD->getType().getNonReferenceType(), Loc);
5291 PrivateScope.addPrivate(VD, LHSTemp);
5292 (void)PrivateScope.Privatize();
5293 return CGF.EmitAnyExpr(UpExpr);
5296 if ((*IPriv)->getType()->isArrayType()) {
5297 // Emit atomic reduction for array section.
5298 const auto *RHSVar =
5299 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5300 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5301 AtomicRedGen, XExpr, EExpr, UpExpr);
5302 } else {
5303 // Emit atomic reduction for array subscript or single variable.
5304 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5306 } else {
5307 // Emit as a critical region.
5308 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5309 const Expr *, const Expr *) {
5310 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5311 std::string Name = RT.getName({"atomic_reduction"});
5312 RT.emitCriticalRegion(
5313 CGF, Name,
5314 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5315 Action.Enter(CGF);
5316 emitReductionCombiner(CGF, E);
5318 Loc);
5320 if ((*IPriv)->getType()->isArrayType()) {
5321 const auto *LHSVar =
5322 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5323 const auto *RHSVar =
5324 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5325 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5326 CritRedGen);
5327 } else {
5328 CritRedGen(CGF, nullptr, nullptr, nullptr);
5331 ++ILHS;
5332 ++IRHS;
5333 ++IPriv;
5336 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5337 if (!WithNowait) {
5338 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5339 llvm::Value *EndArgs[] = {
5340 IdentTLoc, // ident_t *<loc>
5341 ThreadId, // i32 <gtid>
5342 Lock // kmp_critical_name *&<lock>
5344 CommonActionTy Action(nullptr, std::nullopt,
5345 OMPBuilder.getOrCreateRuntimeFunction(
5346 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5347 EndArgs);
5348 AtomicRCG.setAction(Action);
5349 AtomicRCG(CGF);
5350 } else {
5351 AtomicRCG(CGF);
5354 CGF.EmitBranch(DefaultBB);
5355 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5358 /// Generates unique name for artificial threadprivate variables.
5359 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5360 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5361 const Expr *Ref) {
5362 SmallString<256> Buffer;
5363 llvm::raw_svector_ostream Out(Buffer);
5364 const clang::DeclRefExpr *DE;
5365 const VarDecl *D = ::getBaseDecl(Ref, DE);
5366 if (!D)
5367 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5368 D = D->getCanonicalDecl();
5369 std::string Name = CGM.getOpenMPRuntime().getName(
5370 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5371 Out << Prefix << Name << "_"
5372 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5373 return std::string(Out.str());
5376 /// Emits reduction initializer function:
5377 /// \code
5378 /// void @.red_init(void* %arg, void* %orig) {
5379 /// %0 = bitcast void* %arg to <type>*
5380 /// store <type> <init>, <type>* %0
5381 /// ret void
5382 /// }
5383 /// \endcode
5384 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5385 SourceLocation Loc,
5386 ReductionCodeGen &RCG, unsigned N) {
5387 ASTContext &C = CGM.getContext();
5388 QualType VoidPtrTy = C.VoidPtrTy;
5389 VoidPtrTy.addRestrict();
5390 FunctionArgList Args;
5391 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5392 ImplicitParamDecl::Other);
5393 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5394 ImplicitParamDecl::Other);
5395 Args.emplace_back(&Param);
5396 Args.emplace_back(&ParamOrig);
5397 const auto &FnInfo =
5398 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5399 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5400 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5401 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5402 Name, &CGM.getModule());
5403 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5404 Fn->setDoesNotRecurse();
5405 CodeGenFunction CGF(CGM);
5406 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5407 QualType PrivateType = RCG.getPrivateType(N);
5408 Address PrivateAddr = CGF.EmitLoadOfPointer(
5409 CGF.GetAddrOfLocalVar(&Param).withElementType(
5410 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5411 C.getPointerType(PrivateType)->castAs<PointerType>());
5412 llvm::Value *Size = nullptr;
5413 // If the size of the reduction item is non-constant, load it from global
5414 // threadprivate variable.
5415 if (RCG.getSizes(N).second) {
5416 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5417 CGF, CGM.getContext().getSizeType(),
5418 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5419 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5420 CGM.getContext().getSizeType(), Loc);
5422 RCG.emitAggregateType(CGF, N, Size);
5423 Address OrigAddr = Address::invalid();
5424 // If initializer uses initializer from declare reduction construct, emit a
5425 // pointer to the address of the original reduction item (reuired by reduction
5426 // initializer)
5427 if (RCG.usesReductionInitializer(N)) {
5428 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5429 OrigAddr = CGF.EmitLoadOfPointer(
5430 SharedAddr,
5431 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5433 // Emit the initializer:
5434 // %0 = bitcast void* %arg to <type>*
5435 // store <type> <init>, <type>* %0
5436 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5437 [](CodeGenFunction &) { return false; });
5438 CGF.FinishFunction();
5439 return Fn;
5442 /// Emits reduction combiner function:
5443 /// \code
5444 /// void @.red_comb(void* %arg0, void* %arg1) {
5445 /// %lhs = bitcast void* %arg0 to <type>*
5446 /// %rhs = bitcast void* %arg1 to <type>*
5447 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5448 /// store <type> %2, <type>* %lhs
5449 /// ret void
5450 /// }
5451 /// \endcode
5452 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5453 SourceLocation Loc,
5454 ReductionCodeGen &RCG, unsigned N,
5455 const Expr *ReductionOp,
5456 const Expr *LHS, const Expr *RHS,
5457 const Expr *PrivateRef) {
5458 ASTContext &C = CGM.getContext();
5459 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5460 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5461 FunctionArgList Args;
5462 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5463 C.VoidPtrTy, ImplicitParamDecl::Other);
5464 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5465 ImplicitParamDecl::Other);
5466 Args.emplace_back(&ParamInOut);
5467 Args.emplace_back(&ParamIn);
5468 const auto &FnInfo =
5469 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5470 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5471 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5472 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5473 Name, &CGM.getModule());
5474 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5475 Fn->setDoesNotRecurse();
5476 CodeGenFunction CGF(CGM);
5477 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5478 llvm::Value *Size = nullptr;
5479 // If the size of the reduction item is non-constant, load it from global
5480 // threadprivate variable.
5481 if (RCG.getSizes(N).second) {
5482 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5483 CGF, CGM.getContext().getSizeType(),
5484 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5485 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5486 CGM.getContext().getSizeType(), Loc);
5488 RCG.emitAggregateType(CGF, N, Size);
5489 // Remap lhs and rhs variables to the addresses of the function arguments.
5490 // %lhs = bitcast void* %arg0 to <type>*
5491 // %rhs = bitcast void* %arg1 to <type>*
5492 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5493 PrivateScope.addPrivate(
5494 LHSVD,
5495 // Pull out the pointer to the variable.
5496 CGF.EmitLoadOfPointer(
5497 CGF.GetAddrOfLocalVar(&ParamInOut)
5498 .withElementType(
5499 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5500 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5501 PrivateScope.addPrivate(
5502 RHSVD,
5503 // Pull out the pointer to the variable.
5504 CGF.EmitLoadOfPointer(
5505 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5506 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5507 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5508 PrivateScope.Privatize();
5509 // Emit the combiner body:
5510 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5511 // store <type> %2, <type>* %lhs
5512 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5513 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5514 cast<DeclRefExpr>(RHS));
5515 CGF.FinishFunction();
5516 return Fn;
5519 /// Emits reduction finalizer function:
5520 /// \code
5521 /// void @.red_fini(void* %arg) {
5522 /// %0 = bitcast void* %arg to <type>*
5523 /// <destroy>(<type>* %0)
5524 /// ret void
5525 /// }
5526 /// \endcode
5527 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5528 SourceLocation Loc,
5529 ReductionCodeGen &RCG, unsigned N) {
5530 if (!RCG.needCleanups(N))
5531 return nullptr;
5532 ASTContext &C = CGM.getContext();
5533 FunctionArgList Args;
5534 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5535 ImplicitParamDecl::Other);
5536 Args.emplace_back(&Param);
5537 const auto &FnInfo =
5538 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5539 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5540 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5541 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5542 Name, &CGM.getModule());
5543 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5544 Fn->setDoesNotRecurse();
5545 CodeGenFunction CGF(CGM);
5546 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5547 Address PrivateAddr = CGF.EmitLoadOfPointer(
5548 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5549 llvm::Value *Size = nullptr;
5550 // If the size of the reduction item is non-constant, load it from global
5551 // threadprivate variable.
5552 if (RCG.getSizes(N).second) {
5553 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5554 CGF, CGM.getContext().getSizeType(),
5555 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5556 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5557 CGM.getContext().getSizeType(), Loc);
5559 RCG.emitAggregateType(CGF, N, Size);
5560 // Emit the finalizer body:
5561 // <destroy>(<type>* %0)
5562 RCG.emitCleanups(CGF, N, PrivateAddr);
5563 CGF.FinishFunction(Loc);
5564 return Fn;
5567 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5568 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5569 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5570 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5571 return nullptr;
5573 // Build typedef struct:
5574 // kmp_taskred_input {
5575 // void *reduce_shar; // shared reduction item
5576 // void *reduce_orig; // original reduction item used for initialization
5577 // size_t reduce_size; // size of data item
5578 // void *reduce_init; // data initialization routine
5579 // void *reduce_fini; // data finalization routine
5580 // void *reduce_comb; // data combiner routine
5581 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5582 // } kmp_taskred_input_t;
5583 ASTContext &C = CGM.getContext();
5584 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5585 RD->startDefinition();
5586 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5587 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5588 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5589 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5590 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5591 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5592 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5593 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5594 RD->completeDefinition();
5595 QualType RDType = C.getRecordType(RD);
5596 unsigned Size = Data.ReductionVars.size();
5597 llvm::APInt ArraySize(/*numBits=*/64, Size);
5598 QualType ArrayRDType = C.getConstantArrayType(
5599 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5600 // kmp_task_red_input_t .rd_input.[Size];
5601 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5602 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5603 Data.ReductionCopies, Data.ReductionOps);
5604 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5605 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5606 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5607 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5608 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5609 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5610 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5611 ".rd_input.gep.");
5612 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5613 // ElemLVal.reduce_shar = &Shareds[Cnt];
5614 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5615 RCG.emitSharedOrigLValue(CGF, Cnt);
5616 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5617 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5618 // ElemLVal.reduce_orig = &Origs[Cnt];
5619 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5620 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5621 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5622 RCG.emitAggregateType(CGF, Cnt);
5623 llvm::Value *SizeValInChars;
5624 llvm::Value *SizeVal;
5625 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5626 // We use delayed creation/initialization for VLAs and array sections. It is
5627 // required because runtime does not provide the way to pass the sizes of
5628 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5629 // threadprivate global variables are used to store these values and use
5630 // them in the functions.
5631 bool DelayedCreation = !!SizeVal;
5632 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5633 /*isSigned=*/false);
5634 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5635 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5636 // ElemLVal.reduce_init = init;
5637 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5638 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5639 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5640 // ElemLVal.reduce_fini = fini;
5641 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5642 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5643 llvm::Value *FiniAddr =
5644 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5645 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5646 // ElemLVal.reduce_comb = comb;
5647 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5648 llvm::Value *CombAddr = emitReduceCombFunction(
5649 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5650 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5651 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5652 // ElemLVal.flags = 0;
5653 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5654 if (DelayedCreation) {
5655 CGF.EmitStoreOfScalar(
5656 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5657 FlagsLVal);
5658 } else
5659 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
5660 FlagsLVal.getType());
5662 if (Data.IsReductionWithTaskMod) {
5663 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5664 // is_ws, int num, void *data);
5665 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5666 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5667 CGM.IntTy, /*isSigned=*/true);
5668 llvm::Value *Args[] = {
5669 IdentTLoc, GTid,
5670 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5671 /*isSigned=*/true),
5672 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5673 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5674 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5675 return CGF.EmitRuntimeCall(
5676 OMPBuilder.getOrCreateRuntimeFunction(
5677 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5678 Args);
5680 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5681 llvm::Value *Args[] = {
5682 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5683 /*isSigned=*/true),
5684 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5685 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5686 CGM.VoidPtrTy)};
5687 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5688 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5689 Args);
5692 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5693 SourceLocation Loc,
5694 bool IsWorksharingReduction) {
5695 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5696 // is_ws, int num, void *data);
5697 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5698 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5699 CGM.IntTy, /*isSigned=*/true);
5700 llvm::Value *Args[] = {IdentTLoc, GTid,
5701 llvm::ConstantInt::get(CGM.IntTy,
5702 IsWorksharingReduction ? 1 : 0,
5703 /*isSigned=*/true)};
5704 (void)CGF.EmitRuntimeCall(
5705 OMPBuilder.getOrCreateRuntimeFunction(
5706 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5707 Args);
5710 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5711 SourceLocation Loc,
5712 ReductionCodeGen &RCG,
5713 unsigned N) {
5714 auto Sizes = RCG.getSizes(N);
5715 // Emit threadprivate global variable if the type is non-constant
5716 // (Sizes.second = nullptr).
5717 if (Sizes.second) {
5718 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5719 /*isSigned=*/false);
5720 Address SizeAddr = getAddrOfArtificialThreadPrivate(
5721 CGF, CGM.getContext().getSizeType(),
5722 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5723 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5727 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5728 SourceLocation Loc,
5729 llvm::Value *ReductionsPtr,
5730 LValue SharedLVal) {
5731 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5732 // *d);
5733 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5734 CGM.IntTy,
5735 /*isSigned=*/true),
5736 ReductionsPtr,
5737 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5738 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5739 return Address(
5740 CGF.EmitRuntimeCall(
5741 OMPBuilder.getOrCreateRuntimeFunction(
5742 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5743 Args),
5744 CGF.Int8Ty, SharedLVal.getAlignment());
5747 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5748 const OMPTaskDataTy &Data) {
5749 if (!CGF.HaveInsertPoint())
5750 return;
5752 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5753 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5754 OMPBuilder.createTaskwait(CGF.Builder);
5755 } else {
5756 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5757 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5758 auto &M = CGM.getModule();
5759 Address DependenciesArray = Address::invalid();
5760 llvm::Value *NumOfElements;
5761 std::tie(NumOfElements, DependenciesArray) =
5762 emitDependClause(CGF, Data.Dependences, Loc);
5763 if (!Data.Dependences.empty()) {
5764 llvm::Value *DepWaitTaskArgs[7];
5765 DepWaitTaskArgs[0] = UpLoc;
5766 DepWaitTaskArgs[1] = ThreadID;
5767 DepWaitTaskArgs[2] = NumOfElements;
5768 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5769 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5770 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5771 DepWaitTaskArgs[6] =
5772 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5774 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5776 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5777 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5778 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5779 // kmp_int32 has_no_wait); if dependence info is specified.
5780 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5781 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5782 DepWaitTaskArgs);
5784 } else {
5786 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5787 // global_tid);
5788 llvm::Value *Args[] = {UpLoc, ThreadID};
5789 // Ignore return result until untied tasks are supported.
5790 CGF.EmitRuntimeCall(
5791 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5792 Args);
5796 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5797 Region->emitUntiedSwitch(CGF);
5800 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5801 OpenMPDirectiveKind InnerKind,
5802 const RegionCodeGenTy &CodeGen,
5803 bool HasCancel) {
5804 if (!CGF.HaveInsertPoint())
5805 return;
5806 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5807 InnerKind != OMPD_critical &&
5808 InnerKind != OMPD_master &&
5809 InnerKind != OMPD_masked);
5810 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5813 namespace {
5814 enum RTCancelKind {
5815 CancelNoreq = 0,
5816 CancelParallel = 1,
5817 CancelLoop = 2,
5818 CancelSections = 3,
5819 CancelTaskgroup = 4
5821 } // anonymous namespace
5823 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5824 RTCancelKind CancelKind = CancelNoreq;
5825 if (CancelRegion == OMPD_parallel)
5826 CancelKind = CancelParallel;
5827 else if (CancelRegion == OMPD_for)
5828 CancelKind = CancelLoop;
5829 else if (CancelRegion == OMPD_sections)
5830 CancelKind = CancelSections;
5831 else {
5832 assert(CancelRegion == OMPD_taskgroup);
5833 CancelKind = CancelTaskgroup;
5835 return CancelKind;
5838 void CGOpenMPRuntime::emitCancellationPointCall(
5839 CodeGenFunction &CGF, SourceLocation Loc,
5840 OpenMPDirectiveKind CancelRegion) {
5841 if (!CGF.HaveInsertPoint())
5842 return;
5843 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5844 // global_tid, kmp_int32 cncl_kind);
5845 if (auto *OMPRegionInfo =
5846 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5847 // For 'cancellation point taskgroup', the task region info may not have a
5848 // cancel. This may instead happen in another adjacent task.
5849 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5850 llvm::Value *Args[] = {
5851 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5852 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5853 // Ignore return result until untied tasks are supported.
5854 llvm::Value *Result = CGF.EmitRuntimeCall(
5855 OMPBuilder.getOrCreateRuntimeFunction(
5856 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5857 Args);
5858 // if (__kmpc_cancellationpoint()) {
5859 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5860 // exit from construct;
5861 // }
5862 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5863 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5864 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5865 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5866 CGF.EmitBlock(ExitBB);
5867 if (CancelRegion == OMPD_parallel)
5868 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5869 // exit from construct;
5870 CodeGenFunction::JumpDest CancelDest =
5871 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5872 CGF.EmitBranchThroughCleanup(CancelDest);
5873 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5878 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5879 const Expr *IfCond,
5880 OpenMPDirectiveKind CancelRegion) {
5881 if (!CGF.HaveInsertPoint())
5882 return;
5883 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5884 // kmp_int32 cncl_kind);
5885 auto &M = CGM.getModule();
5886 if (auto *OMPRegionInfo =
5887 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5888 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5889 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5890 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5891 llvm::Value *Args[] = {
5892 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5893 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5894 // Ignore return result until untied tasks are supported.
5895 llvm::Value *Result = CGF.EmitRuntimeCall(
5896 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5897 // if (__kmpc_cancel()) {
5898 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5899 // exit from construct;
5900 // }
5901 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5902 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5903 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5904 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5905 CGF.EmitBlock(ExitBB);
5906 if (CancelRegion == OMPD_parallel)
5907 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5908 // exit from construct;
5909 CodeGenFunction::JumpDest CancelDest =
5910 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5911 CGF.EmitBranchThroughCleanup(CancelDest);
5912 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5914 if (IfCond) {
5915 emitIfClause(CGF, IfCond, ThenGen,
5916 [](CodeGenFunction &, PrePostActionTy &) {});
5917 } else {
5918 RegionCodeGenTy ThenRCG(ThenGen);
5919 ThenRCG(CGF);
5924 namespace {
5925 /// Cleanup action for uses_allocators support.
5926 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5927 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5929 public:
5930 OMPUsesAllocatorsActionTy(
5931 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5932 : Allocators(Allocators) {}
5933 void Enter(CodeGenFunction &CGF) override {
5934 if (!CGF.HaveInsertPoint())
5935 return;
5936 for (const auto &AllocatorData : Allocators) {
5937 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
5938 CGF, AllocatorData.first, AllocatorData.second);
5941 void Exit(CodeGenFunction &CGF) override {
5942 if (!CGF.HaveInsertPoint())
5943 return;
5944 for (const auto &AllocatorData : Allocators) {
5945 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
5946 AllocatorData.first);
5950 } // namespace
5952 void CGOpenMPRuntime::emitTargetOutlinedFunction(
5953 const OMPExecutableDirective &D, StringRef ParentName,
5954 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5955 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5956 assert(!ParentName.empty() && "Invalid target entry parent name!");
5957 HasEmittedTargetRegion = true;
5958 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
5959 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5960 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5961 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5962 if (!D.AllocatorTraits)
5963 continue;
5964 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5967 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5968 CodeGen.setAction(UsesAllocatorAction);
5969 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5970 IsOffloadEntry, CodeGen);
5973 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
5974 const Expr *Allocator,
5975 const Expr *AllocatorTraits) {
5976 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5977 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5978 // Use default memspace handle.
5979 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5980 llvm::Value *NumTraits = llvm::ConstantInt::get(
5981 CGF.IntTy, cast<ConstantArrayType>(
5982 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5983 ->getSize()
5984 .getLimitedValue());
5985 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5986 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5987 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5988 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5989 AllocatorTraitsLVal.getBaseInfo(),
5990 AllocatorTraitsLVal.getTBAAInfo());
5991 llvm::Value *Traits = Addr.getPointer();
5993 llvm::Value *AllocatorVal =
5994 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5995 CGM.getModule(), OMPRTL___kmpc_init_allocator),
5996 {ThreadId, MemSpaceHandle, NumTraits, Traits});
5997 // Store to allocator.
5998 CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5999 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6000 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6001 AllocatorVal =
6002 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6003 Allocator->getType(), Allocator->getExprLoc());
6004 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6007 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6008 const Expr *Allocator) {
6009 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6010 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6011 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6012 llvm::Value *AllocatorVal =
6013 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6014 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6015 CGF.getContext().VoidPtrTy,
6016 Allocator->getExprLoc());
6017 (void)CGF.EmitRuntimeCall(
6018 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6019 OMPRTL___kmpc_destroy_allocator),
6020 {ThreadId, AllocatorVal});
6023 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6024 const OMPExecutableDirective &D, StringRef ParentName,
6025 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6026 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6028 llvm::TargetRegionEntryInfo EntryInfo =
6029 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6031 CodeGenFunction CGF(CGM, true);
6032 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6033 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6034 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6036 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6037 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6038 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6041 // Get NumTeams and ThreadLimit attributes
6042 int32_t DefaultValTeams = -1;
6043 uint32_t DefaultValThreads = UINT32_MAX;
6044 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6045 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads,
6046 /*UpperBoundOnly=*/true);
6048 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
6049 DefaultValTeams, DefaultValThreads,
6050 IsOffloadEntry, OutlinedFn, OutlinedFnID);
6052 if (!OutlinedFn)
6053 return;
6055 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6057 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6058 for (auto *A : C->getAttrs()) {
6059 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6060 CGM.handleCUDALaunchBoundsAttr(OutlinedFn, Attr);
6061 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6062 CGM.handleAMDGPUFlatWorkGroupSizeAttr(OutlinedFn, Attr);
6063 else if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6064 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6065 else
6066 llvm_unreachable("Unexpected attribute kind");
6071 /// Checks if the expression is constant or does not have non-trivial function
6072 /// calls.
6073 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6074 // We can skip constant expressions.
6075 // We can skip expressions with trivial calls or simple expressions.
6076 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6077 !E->hasNonTrivialCall(Ctx)) &&
6078 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6081 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6082 const Stmt *Body) {
6083 const Stmt *Child = Body->IgnoreContainers();
6084 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6085 Child = nullptr;
6086 for (const Stmt *S : C->body()) {
6087 if (const auto *E = dyn_cast<Expr>(S)) {
6088 if (isTrivial(Ctx, E))
6089 continue;
6091 // Some of the statements can be ignored.
6092 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6093 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6094 continue;
6095 // Analyze declarations.
6096 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6097 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6098 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6099 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6100 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6101 isa<UsingDirectiveDecl>(D) ||
6102 isa<OMPDeclareReductionDecl>(D) ||
6103 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6104 return true;
6105 const auto *VD = dyn_cast<VarDecl>(D);
6106 if (!VD)
6107 return false;
6108 return VD->hasGlobalStorage() || !VD->isUsed();
6110 continue;
6112 // Found multiple children - cannot get the one child only.
6113 if (Child)
6114 return nullptr;
6115 Child = S;
6117 if (Child)
6118 Child = Child->IgnoreContainers();
6120 return Child;
6123 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6124 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6125 int32_t &DefaultVal) {
6127 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6128 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6129 "Expected target-based executable directive.");
6130 switch (DirectiveKind) {
6131 case OMPD_target: {
6132 const auto *CS = D.getInnermostCapturedStmt();
6133 const auto *Body =
6134 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6135 const Stmt *ChildStmt =
6136 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6137 if (const auto *NestedDir =
6138 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6139 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6140 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6141 const Expr *NumTeams =
6142 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6143 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6144 if (auto Constant =
6145 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6146 DefaultVal = Constant->getExtValue();
6147 return NumTeams;
6149 DefaultVal = 0;
6150 return nullptr;
6152 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6153 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6154 DefaultVal = 1;
6155 return nullptr;
6157 DefaultVal = 1;
6158 return nullptr;
6160 // A value of -1 is used to check if we need to emit no teams region
6161 DefaultVal = -1;
6162 return nullptr;
6164 case OMPD_target_teams_loop:
6165 case OMPD_target_teams:
6166 case OMPD_target_teams_distribute:
6167 case OMPD_target_teams_distribute_simd:
6168 case OMPD_target_teams_distribute_parallel_for:
6169 case OMPD_target_teams_distribute_parallel_for_simd: {
6170 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6171 const Expr *NumTeams =
6172 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6173 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6174 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6175 DefaultVal = Constant->getExtValue();
6176 return NumTeams;
6178 DefaultVal = 0;
6179 return nullptr;
6181 case OMPD_target_parallel:
6182 case OMPD_target_parallel_for:
6183 case OMPD_target_parallel_for_simd:
6184 case OMPD_target_parallel_loop:
6185 case OMPD_target_simd:
6186 DefaultVal = 1;
6187 return nullptr;
6188 case OMPD_parallel:
6189 case OMPD_for:
6190 case OMPD_parallel_for:
6191 case OMPD_parallel_loop:
6192 case OMPD_parallel_master:
6193 case OMPD_parallel_sections:
6194 case OMPD_for_simd:
6195 case OMPD_parallel_for_simd:
6196 case OMPD_cancel:
6197 case OMPD_cancellation_point:
6198 case OMPD_ordered:
6199 case OMPD_threadprivate:
6200 case OMPD_allocate:
6201 case OMPD_task:
6202 case OMPD_simd:
6203 case OMPD_tile:
6204 case OMPD_unroll:
6205 case OMPD_sections:
6206 case OMPD_section:
6207 case OMPD_single:
6208 case OMPD_master:
6209 case OMPD_critical:
6210 case OMPD_taskyield:
6211 case OMPD_barrier:
6212 case OMPD_taskwait:
6213 case OMPD_taskgroup:
6214 case OMPD_atomic:
6215 case OMPD_flush:
6216 case OMPD_depobj:
6217 case OMPD_scan:
6218 case OMPD_teams:
6219 case OMPD_target_data:
6220 case OMPD_target_exit_data:
6221 case OMPD_target_enter_data:
6222 case OMPD_distribute:
6223 case OMPD_distribute_simd:
6224 case OMPD_distribute_parallel_for:
6225 case OMPD_distribute_parallel_for_simd:
6226 case OMPD_teams_distribute:
6227 case OMPD_teams_distribute_simd:
6228 case OMPD_teams_distribute_parallel_for:
6229 case OMPD_teams_distribute_parallel_for_simd:
6230 case OMPD_target_update:
6231 case OMPD_declare_simd:
6232 case OMPD_declare_variant:
6233 case OMPD_begin_declare_variant:
6234 case OMPD_end_declare_variant:
6235 case OMPD_declare_target:
6236 case OMPD_end_declare_target:
6237 case OMPD_declare_reduction:
6238 case OMPD_declare_mapper:
6239 case OMPD_taskloop:
6240 case OMPD_taskloop_simd:
6241 case OMPD_master_taskloop:
6242 case OMPD_master_taskloop_simd:
6243 case OMPD_parallel_master_taskloop:
6244 case OMPD_parallel_master_taskloop_simd:
6245 case OMPD_requires:
6246 case OMPD_metadirective:
6247 case OMPD_unknown:
6248 break;
6249 default:
6250 break;
6252 llvm_unreachable("Unexpected directive kind.");
6255 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6256 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6257 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6258 "Clauses associated with the teams directive expected to be emitted "
6259 "only for the host!");
6260 CGBuilderTy &Bld = CGF.Builder;
6261 int32_t DefaultNT = -1;
6262 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6263 if (NumTeams != nullptr) {
6264 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6266 switch (DirectiveKind) {
6267 case OMPD_target: {
6268 const auto *CS = D.getInnermostCapturedStmt();
6269 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6270 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6271 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6272 /*IgnoreResultAssign*/ true);
6273 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6274 /*isSigned=*/true);
6276 case OMPD_target_teams:
6277 case OMPD_target_teams_distribute:
6278 case OMPD_target_teams_distribute_simd:
6279 case OMPD_target_teams_distribute_parallel_for:
6280 case OMPD_target_teams_distribute_parallel_for_simd: {
6281 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6282 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6283 /*IgnoreResultAssign*/ true);
6284 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6285 /*isSigned=*/true);
6287 default:
6288 break;
6292 return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
6295 /// Check for a num threads constant value (stored in \p DefaultVal), or
6296 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6297 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6298 /// nullptr, no expression evaluation is perfomed.
6299 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6300 const Expr **E, uint32_t &UpperBound,
6301 bool UpperBoundOnly, llvm::Value **CondVal) {
6302 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6303 CGF.getContext(), CS->getCapturedStmt());
6304 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6305 if (!Dir)
6306 return;
6308 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6309 // Handle if clause. If if clause present, the number of threads is
6310 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6311 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6312 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6313 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6314 const OMPIfClause *IfClause = nullptr;
6315 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6316 if (C->getNameModifier() == OMPD_unknown ||
6317 C->getNameModifier() == OMPD_parallel) {
6318 IfClause = C;
6319 break;
6322 if (IfClause) {
6323 const Expr *CondExpr = IfClause->getCondition();
6324 bool Result;
6325 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6326 if (!Result) {
6327 UpperBound = 1;
6328 return;
6330 } else {
6331 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6332 if (const auto *PreInit =
6333 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6334 for (const auto *I : PreInit->decls()) {
6335 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6336 CGF.EmitVarDecl(cast<VarDecl>(*I));
6337 } else {
6338 CodeGenFunction::AutoVarEmission Emission =
6339 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6340 CGF.EmitAutoVarCleanups(Emission);
6343 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6348 // Check the value of num_threads clause iff if clause was not specified
6349 // or is not evaluated to false.
6350 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6351 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6352 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6353 const auto *NumThreadsClause =
6354 Dir->getSingleClause<OMPNumThreadsClause>();
6355 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6356 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6357 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6358 UpperBound =
6359 UpperBound
6360 ? Constant->getZExtValue()
6361 : std::min(UpperBound,
6362 static_cast<uint32_t>(Constant->getZExtValue()));
6363 // If we haven't found a upper bound, remember we saw a thread limiting
6364 // clause.
6365 if (UpperBound == UINT32_MAX)
6366 UpperBound = 0;
6367 if (!E)
6368 return;
6369 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6370 if (const auto *PreInit =
6371 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6372 for (const auto *I : PreInit->decls()) {
6373 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6374 CGF.EmitVarDecl(cast<VarDecl>(*I));
6375 } else {
6376 CodeGenFunction::AutoVarEmission Emission =
6377 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6378 CGF.EmitAutoVarCleanups(Emission);
6382 *E = NTExpr;
6384 return;
6386 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6387 UpperBound = 1;
6388 return;
6391 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6392 CodeGenFunction &CGF, const OMPExecutableDirective &D, uint32_t &UpperBound,
6393 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6394 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6395 "Clauses associated with the teams directive expected to be emitted "
6396 "only for the host!");
6397 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6398 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6399 "Expected target-based executable directive.");
6401 const Expr *NT = nullptr;
6402 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6404 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6405 if (E->isIntegerConstantExpr(CGF.getContext())) {
6406 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6407 UpperBound = UpperBound ? Constant->getZExtValue()
6408 : std::min(UpperBound,
6409 uint32_t(Constant->getZExtValue()));
6411 // If we haven't found a upper bound, remember we saw a thread limiting
6412 // clause.
6413 if (UpperBound == UINT32_MAX)
6414 UpperBound = 0;
6415 if (EPtr)
6416 *EPtr = E;
6419 auto ReturnSequential = [&]() {
6420 UpperBound = 1;
6421 return NT;
6424 switch (DirectiveKind) {
6425 case OMPD_target: {
6426 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6427 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6428 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6429 CGF.getContext(), CS->getCapturedStmt());
6430 // TODO: The standard is not clear how to resolve two thread limit clauses,
6431 // let's pick the teams one if it's present, otherwise the target one.
6432 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6433 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6434 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6435 ThreadLimitClause = TLC;
6436 if (ThreadLimitExpr) {
6437 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6438 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6439 CodeGenFunction::LexicalScope Scope(
6440 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6441 if (const auto *PreInit =
6442 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6443 for (const auto *I : PreInit->decls()) {
6444 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6445 CGF.EmitVarDecl(cast<VarDecl>(*I));
6446 } else {
6447 CodeGenFunction::AutoVarEmission Emission =
6448 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6449 CGF.EmitAutoVarCleanups(Emission);
6456 if (ThreadLimitClause)
6457 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6458 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6459 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6460 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6461 CS = Dir->getInnermostCapturedStmt();
6462 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6463 CGF.getContext(), CS->getCapturedStmt());
6464 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6466 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6467 CS = Dir->getInnermostCapturedStmt();
6468 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6469 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6470 return ReturnSequential();
6472 return NT;
6474 case OMPD_target_teams: {
6475 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6476 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6477 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6478 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6480 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6481 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6482 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6483 CGF.getContext(), CS->getCapturedStmt());
6484 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6485 if (Dir->getDirectiveKind() == OMPD_distribute) {
6486 CS = Dir->getInnermostCapturedStmt();
6487 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6490 return NT;
6492 case OMPD_target_teams_distribute:
6493 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6494 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6495 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6496 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6498 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6499 UpperBoundOnly, CondVal);
6500 return NT;
6501 case OMPD_target_teams_loop:
6502 case OMPD_target_parallel_loop:
6503 case OMPD_target_parallel:
6504 case OMPD_target_parallel_for:
6505 case OMPD_target_parallel_for_simd:
6506 case OMPD_target_teams_distribute_parallel_for:
6507 case OMPD_target_teams_distribute_parallel_for_simd: {
6508 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6509 const OMPIfClause *IfClause = nullptr;
6510 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6511 if (C->getNameModifier() == OMPD_unknown ||
6512 C->getNameModifier() == OMPD_parallel) {
6513 IfClause = C;
6514 break;
6517 if (IfClause) {
6518 const Expr *Cond = IfClause->getCondition();
6519 bool Result;
6520 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6521 if (!Result)
6522 return ReturnSequential();
6523 } else {
6524 CodeGenFunction::RunCleanupsScope Scope(CGF);
6525 *CondVal = CGF.EvaluateExprAsBool(Cond);
6529 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6530 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6531 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6532 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6534 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6535 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6536 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6537 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6538 return NumThreadsClause->getNumThreads();
6540 return NT;
6542 case OMPD_target_teams_distribute_simd:
6543 case OMPD_target_simd:
6544 return ReturnSequential();
6545 default:
6546 break;
6548 llvm_unreachable("Unsupported directive kind.");
6551 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6552 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6553 llvm::Value *NumThreadsVal = nullptr;
6554 llvm::Value *CondVal = nullptr;
6555 llvm::Value *ThreadLimitVal = nullptr;
6556 const Expr *ThreadLimitExpr = nullptr;
6557 uint32_t UpperBound = -1;
6559 const Expr *NT = getNumThreadsExprForTargetDirective(
6560 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6561 &ThreadLimitExpr);
6563 // Thread limit expressions are used below, emit them.
6564 if (ThreadLimitExpr) {
6565 ThreadLimitVal =
6566 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6567 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6568 /*isSigned=*/false);
6571 // Generate the num teams expression.
6572 if (UpperBound == 1) {
6573 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6574 } else if (NT) {
6575 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6576 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6577 /*isSigned=*/false);
6578 } else if (ThreadLimitVal) {
6579 // If we do not have a num threads value but a thread limit, replace the
6580 // former with the latter. We know handled the thread limit expression.
6581 NumThreadsVal = ThreadLimitVal;
6582 ThreadLimitVal = nullptr;
6583 } else {
6584 // Default to "0" which means runtime choice.
6585 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6586 NumThreadsVal = CGF.Builder.getInt32(0);
6589 // Handle if clause. If if clause present, the number of threads is
6590 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6591 if (CondVal) {
6592 CodeGenFunction::RunCleanupsScope Scope(CGF);
6593 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6594 CGF.Builder.getInt32(1));
6597 // If the thread limit and num teams expression were present, take the
6598 // minimum.
6599 if (ThreadLimitVal) {
6600 NumThreadsVal = CGF.Builder.CreateSelect(
6601 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6602 ThreadLimitVal, NumThreadsVal);
6605 return NumThreadsVal;
6608 namespace {
6609 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6611 // Utility to handle information from clauses associated with a given
6612 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6613 // It provides a convenient interface to obtain the information and generate
6614 // code for that information.
6615 class MappableExprsHandler {
6616 public:
6617 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6618 static unsigned getFlagMemberOffset() {
6619 unsigned Offset = 0;
6620 for (uint64_t Remain =
6621 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6622 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6623 !(Remain & 1); Remain = Remain >> 1)
6624 Offset++;
6625 return Offset;
6628 /// Class that holds debugging information for a data mapping to be passed to
6629 /// the runtime library.
6630 class MappingExprInfo {
6631 /// The variable declaration used for the data mapping.
6632 const ValueDecl *MapDecl = nullptr;
6633 /// The original expression used in the map clause, or null if there is
6634 /// none.
6635 const Expr *MapExpr = nullptr;
6637 public:
6638 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6639 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6641 const ValueDecl *getMapDecl() const { return MapDecl; }
6642 const Expr *getMapExpr() const { return MapExpr; }
6645 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6646 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6647 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6648 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6649 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6650 using MapNonContiguousArrayTy =
6651 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6652 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6653 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6655 /// This structure contains combined information generated for mappable
6656 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6657 /// mappers, and non-contiguous information.
6658 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6659 MapExprsArrayTy Exprs;
6660 MapValueDeclsArrayTy Mappers;
6661 MapValueDeclsArrayTy DevicePtrDecls;
6663 /// Append arrays in \a CurInfo.
6664 void append(MapCombinedInfoTy &CurInfo) {
6665 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6666 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6667 CurInfo.DevicePtrDecls.end());
6668 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6669 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6673 /// Map between a struct and the its lowest & highest elements which have been
6674 /// mapped.
6675 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6676 /// HE(FieldIndex, Pointer)}
6677 struct StructRangeInfoTy {
6678 MapCombinedInfoTy PreliminaryMapData;
6679 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6680 0, Address::invalid()};
6681 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6682 0, Address::invalid()};
6683 Address Base = Address::invalid();
6684 Address LB = Address::invalid();
6685 bool IsArraySection = false;
6686 bool HasCompleteRecord = false;
6689 private:
6690 /// Kind that defines how a device pointer has to be returned.
6691 struct MapInfo {
6692 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6693 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6694 ArrayRef<OpenMPMapModifierKind> MapModifiers;
6695 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6696 bool ReturnDevicePointer = false;
6697 bool IsImplicit = false;
6698 const ValueDecl *Mapper = nullptr;
6699 const Expr *VarRef = nullptr;
6700 bool ForDeviceAddr = false;
6702 MapInfo() = default;
6703 MapInfo(
6704 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6705 OpenMPMapClauseKind MapType,
6706 ArrayRef<OpenMPMapModifierKind> MapModifiers,
6707 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6708 bool ReturnDevicePointer, bool IsImplicit,
6709 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6710 bool ForDeviceAddr = false)
6711 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6712 MotionModifiers(MotionModifiers),
6713 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6714 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6717 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6718 /// member and there is no map information about it, then emission of that
6719 /// entry is deferred until the whole struct has been processed.
6720 struct DeferredDevicePtrEntryTy {
6721 const Expr *IE = nullptr;
6722 const ValueDecl *VD = nullptr;
6723 bool ForDeviceAddr = false;
6725 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6726 bool ForDeviceAddr)
6727 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6730 /// The target directive from where the mappable clauses were extracted. It
6731 /// is either a executable directive or a user-defined mapper directive.
6732 llvm::PointerUnion<const OMPExecutableDirective *,
6733 const OMPDeclareMapperDecl *>
6734 CurDir;
6736 /// Function the directive is being generated for.
6737 CodeGenFunction &CGF;
6739 /// Set of all first private variables in the current directive.
6740 /// bool data is set to true if the variable is implicitly marked as
6741 /// firstprivate, false otherwise.
6742 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6744 /// Map between device pointer declarations and their expression components.
6745 /// The key value for declarations in 'this' is null.
6746 llvm::DenseMap<
6747 const ValueDecl *,
6748 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6749 DevPointersMap;
6751 /// Map between device addr declarations and their expression components.
6752 /// The key value for declarations in 'this' is null.
6753 llvm::DenseMap<
6754 const ValueDecl *,
6755 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6756 HasDevAddrsMap;
6758 /// Map between lambda declarations and their map type.
6759 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6761 llvm::Value *getExprTypeSize(const Expr *E) const {
6762 QualType ExprTy = E->getType().getCanonicalType();
6764 // Calculate the size for array shaping expression.
6765 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6766 llvm::Value *Size =
6767 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6768 for (const Expr *SE : OAE->getDimensions()) {
6769 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6770 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6771 CGF.getContext().getSizeType(),
6772 SE->getExprLoc());
6773 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6775 return Size;
6778 // Reference types are ignored for mapping purposes.
6779 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6780 ExprTy = RefTy->getPointeeType().getCanonicalType();
6782 // Given that an array section is considered a built-in type, we need to
6783 // do the calculation based on the length of the section instead of relying
6784 // on CGF.getTypeSize(E->getType()).
6785 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6786 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6787 OAE->getBase()->IgnoreParenImpCasts())
6788 .getCanonicalType();
6790 // If there is no length associated with the expression and lower bound is
6791 // not specified too, that means we are using the whole length of the
6792 // base.
6793 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6794 !OAE->getLowerBound())
6795 return CGF.getTypeSize(BaseTy);
6797 llvm::Value *ElemSize;
6798 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6799 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6800 } else {
6801 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6802 assert(ATy && "Expecting array type if not a pointer type.");
6803 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6806 // If we don't have a length at this point, that is because we have an
6807 // array section with a single element.
6808 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6809 return ElemSize;
6811 if (const Expr *LenExpr = OAE->getLength()) {
6812 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6813 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6814 CGF.getContext().getSizeType(),
6815 LenExpr->getExprLoc());
6816 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6818 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6819 OAE->getLowerBound() && "expected array_section[lb:].");
6820 // Size = sizetype - lb * elemtype;
6821 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6822 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6823 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6824 CGF.getContext().getSizeType(),
6825 OAE->getLowerBound()->getExprLoc());
6826 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6827 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6828 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6829 LengthVal = CGF.Builder.CreateSelect(
6830 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6831 return LengthVal;
6833 return CGF.getTypeSize(ExprTy);
6836 /// Return the corresponding bits for a given map clause modifier. Add
6837 /// a flag marking the map as a pointer if requested. Add a flag marking the
6838 /// map as the first one of a series of maps that relate to the same map
6839 /// expression.
6840 OpenMPOffloadMappingFlags getMapTypeBits(
6841 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6842 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6843 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6844 OpenMPOffloadMappingFlags Bits =
6845 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6846 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6847 switch (MapType) {
6848 case OMPC_MAP_alloc:
6849 case OMPC_MAP_release:
6850 // alloc and release is the default behavior in the runtime library, i.e.
6851 // if we don't pass any bits alloc/release that is what the runtime is
6852 // going to do. Therefore, we don't need to signal anything for these two
6853 // type modifiers.
6854 break;
6855 case OMPC_MAP_to:
6856 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6857 break;
6858 case OMPC_MAP_from:
6859 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6860 break;
6861 case OMPC_MAP_tofrom:
6862 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6863 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6864 break;
6865 case OMPC_MAP_delete:
6866 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6867 break;
6868 case OMPC_MAP_unknown:
6869 llvm_unreachable("Unexpected map type!");
6871 if (AddPtrFlag)
6872 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6873 if (AddIsTargetParamFlag)
6874 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6875 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6876 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6877 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6878 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6879 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6880 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6881 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6882 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6883 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6884 if (IsNonContiguous)
6885 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6886 return Bits;
6889 /// Return true if the provided expression is a final array section. A
6890 /// final array section, is one whose length can't be proved to be one.
6891 bool isFinalArraySectionExpression(const Expr *E) const {
6892 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6894 // It is not an array section and therefore not a unity-size one.
6895 if (!OASE)
6896 return false;
6898 // An array section with no colon always refer to a single element.
6899 if (OASE->getColonLocFirst().isInvalid())
6900 return false;
6902 const Expr *Length = OASE->getLength();
6904 // If we don't have a length we have to check if the array has size 1
6905 // for this dimension. Also, we should always expect a length if the
6906 // base type is pointer.
6907 if (!Length) {
6908 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
6909 OASE->getBase()->IgnoreParenImpCasts())
6910 .getCanonicalType();
6911 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6912 return ATy->getSize().getSExtValue() != 1;
6913 // If we don't have a constant dimension length, we have to consider
6914 // the current section as having any size, so it is not necessarily
6915 // unitary. If it happen to be unity size, that's user fault.
6916 return true;
6919 // Check if the length evaluates to 1.
6920 Expr::EvalResult Result;
6921 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6922 return true; // Can have more that size 1.
6924 llvm::APSInt ConstLength = Result.Val.getInt();
6925 return ConstLength.getSExtValue() != 1;
6928 /// Generate the base pointers, section pointers, sizes, map type bits, and
6929 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6930 /// map type, map or motion modifiers, and expression components.
6931 /// \a IsFirstComponent should be set to true if the provided set of
6932 /// components is the first associated with a capture.
6933 void generateInfoForComponentList(
6934 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6935 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6936 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6937 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
6938 bool IsFirstComponentList, bool IsImplicit,
6939 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6940 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6941 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6942 OverlappedElements = std::nullopt) const {
6943 // The following summarizes what has to be generated for each map and the
6944 // types below. The generated information is expressed in this order:
6945 // base pointer, section pointer, size, flags
6946 // (to add to the ones that come from the map type and modifier).
6948 // double d;
6949 // int i[100];
6950 // float *p;
6951 // int **a = &i;
6953 // struct S1 {
6954 // int i;
6955 // float f[50];
6956 // }
6957 // struct S2 {
6958 // int i;
6959 // float f[50];
6960 // S1 s;
6961 // double *p;
6962 // struct S2 *ps;
6963 // int &ref;
6964 // }
6965 // S2 s;
6966 // S2 *ps;
6968 // map(d)
6969 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6971 // map(i)
6972 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6974 // map(i[1:23])
6975 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6977 // map(p)
6978 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6980 // map(p[1:24])
6981 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6982 // in unified shared memory mode or for local pointers
6983 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6985 // map((*a)[0:3])
6986 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6987 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6989 // map(**a)
6990 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6991 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6993 // map(s)
6994 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6996 // map(s.i)
6997 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6999 // map(s.s.f)
7000 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7002 // map(s.p)
7003 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7005 // map(to: s.p[:22])
7006 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7007 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7008 // &(s.p), &(s.p[0]), 22*sizeof(double),
7009 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7010 // (*) alloc space for struct members, only this is a target parameter
7011 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7012 // optimizes this entry out, same in the examples below)
7013 // (***) map the pointee (map: to)
7015 // map(to: s.ref)
7016 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7017 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7018 // (*) alloc space for struct members, only this is a target parameter
7019 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7020 // optimizes this entry out, same in the examples below)
7021 // (***) map the pointee (map: to)
7023 // map(s.ps)
7024 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7026 // map(from: s.ps->s.i)
7027 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7028 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7029 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7031 // map(to: s.ps->ps)
7032 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7033 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7034 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7036 // map(s.ps->ps->ps)
7037 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7038 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7039 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7040 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7042 // map(to: s.ps->ps->s.f[:22])
7043 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7044 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7045 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7046 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7048 // map(ps)
7049 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7051 // map(ps->i)
7052 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7054 // map(ps->s.f)
7055 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7057 // map(from: ps->p)
7058 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7060 // map(to: ps->p[:22])
7061 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7062 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7063 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7065 // map(ps->ps)
7066 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7068 // map(from: ps->ps->s.i)
7069 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7070 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7071 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7073 // map(from: ps->ps->ps)
7074 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7075 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7076 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7078 // map(ps->ps->ps->ps)
7079 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7080 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7081 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7082 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7084 // map(to: ps->ps->ps->s.f[:22])
7085 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7086 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7087 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7088 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7090 // map(to: s.f[:22]) map(from: s.p[:33])
7091 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7092 // sizeof(double*) (**), TARGET_PARAM
7093 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7094 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7095 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7096 // (*) allocate contiguous space needed to fit all mapped members even if
7097 // we allocate space for members not mapped (in this example,
7098 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7099 // them as well because they fall between &s.f[0] and &s.p)
7101 // map(from: s.f[:22]) map(to: ps->p[:33])
7102 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7103 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7104 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7105 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7106 // (*) the struct this entry pertains to is the 2nd element in the list of
7107 // arguments, hence MEMBER_OF(2)
7109 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7110 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7111 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7112 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7113 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7114 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7115 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7116 // (*) the struct this entry pertains to is the 4th element in the list
7117 // of arguments, hence MEMBER_OF(4)
7119 // Track if the map information being generated is the first for a capture.
7120 bool IsCaptureFirstInfo = IsFirstComponentList;
7121 // When the variable is on a declare target link or in a to clause with
7122 // unified memory, a reference is needed to hold the host/device address
7123 // of the variable.
7124 bool RequiresReference = false;
7126 // Scan the components from the base to the complete expression.
7127 auto CI = Components.rbegin();
7128 auto CE = Components.rend();
7129 auto I = CI;
7131 // Track if the map information being generated is the first for a list of
7132 // components.
7133 bool IsExpressionFirstInfo = true;
7134 bool FirstPointerInComplexData = false;
7135 Address BP = Address::invalid();
7136 const Expr *AssocExpr = I->getAssociatedExpression();
7137 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7138 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7139 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7141 if (isa<MemberExpr>(AssocExpr)) {
7142 // The base is the 'this' pointer. The content of the pointer is going
7143 // to be the base of the field being mapped.
7144 BP = CGF.LoadCXXThisAddress();
7145 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7146 (OASE &&
7147 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7148 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7149 } else if (OAShE &&
7150 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7151 BP = Address(
7152 CGF.EmitScalarExpr(OAShE->getBase()),
7153 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7154 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7155 } else {
7156 // The base is the reference to the variable.
7157 // BP = &Var.
7158 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7159 if (const auto *VD =
7160 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7161 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7162 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7163 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7164 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7165 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7166 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7167 RequiresReference = true;
7168 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7173 // If the variable is a pointer and is being dereferenced (i.e. is not
7174 // the last component), the base has to be the pointer itself, not its
7175 // reference. References are ignored for mapping purposes.
7176 QualType Ty =
7177 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7178 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7179 // No need to generate individual map information for the pointer, it
7180 // can be associated with the combined storage if shared memory mode is
7181 // active or the base declaration is not global variable.
7182 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7183 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7184 !VD || VD->hasLocalStorage())
7185 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7186 else
7187 FirstPointerInComplexData = true;
7188 ++I;
7192 // Track whether a component of the list should be marked as MEMBER_OF some
7193 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7194 // in a component list should be marked as MEMBER_OF, all subsequent entries
7195 // do not belong to the base struct. E.g.
7196 // struct S2 s;
7197 // s.ps->ps->ps->f[:]
7198 // (1) (2) (3) (4)
7199 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7200 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7201 // is the pointee of ps(2) which is not member of struct s, so it should not
7202 // be marked as such (it is still PTR_AND_OBJ).
7203 // The variable is initialized to false so that PTR_AND_OBJ entries which
7204 // are not struct members are not considered (e.g. array of pointers to
7205 // data).
7206 bool ShouldBeMemberOf = false;
7208 // Variable keeping track of whether or not we have encountered a component
7209 // in the component list which is a member expression. Useful when we have a
7210 // pointer or a final array section, in which case it is the previous
7211 // component in the list which tells us whether we have a member expression.
7212 // E.g. X.f[:]
7213 // While processing the final array section "[:]" it is "f" which tells us
7214 // whether we are dealing with a member of a declared struct.
7215 const MemberExpr *EncounteredME = nullptr;
7217 // Track for the total number of dimension. Start from one for the dummy
7218 // dimension.
7219 uint64_t DimSize = 1;
7221 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7222 bool IsPrevMemberReference = false;
7224 for (; I != CE; ++I) {
7225 // If the current component is member of a struct (parent struct) mark it.
7226 if (!EncounteredME) {
7227 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7228 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7229 // as MEMBER_OF the parent struct.
7230 if (EncounteredME) {
7231 ShouldBeMemberOf = true;
7232 // Do not emit as complex pointer if this is actually not array-like
7233 // expression.
7234 if (FirstPointerInComplexData) {
7235 QualType Ty = std::prev(I)
7236 ->getAssociatedDeclaration()
7237 ->getType()
7238 .getNonReferenceType();
7239 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7240 FirstPointerInComplexData = false;
7245 auto Next = std::next(I);
7247 // We need to generate the addresses and sizes if this is the last
7248 // component, if the component is a pointer or if it is an array section
7249 // whose length can't be proved to be one. If this is a pointer, it
7250 // becomes the base address for the following components.
7252 // A final array section, is one whose length can't be proved to be one.
7253 // If the map item is non-contiguous then we don't treat any array section
7254 // as final array section.
7255 bool IsFinalArraySection =
7256 !IsNonContiguous &&
7257 isFinalArraySectionExpression(I->getAssociatedExpression());
7259 // If we have a declaration for the mapping use that, otherwise use
7260 // the base declaration of the map clause.
7261 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7262 ? I->getAssociatedDeclaration()
7263 : BaseDecl;
7264 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7265 : MapExpr;
7267 // Get information on whether the element is a pointer. Have to do a
7268 // special treatment for array sections given that they are built-in
7269 // types.
7270 const auto *OASE =
7271 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7272 const auto *OAShE =
7273 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7274 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7275 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7276 bool IsPointer =
7277 OAShE ||
7278 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7279 .getCanonicalType()
7280 ->isAnyPointerType()) ||
7281 I->getAssociatedExpression()->getType()->isAnyPointerType();
7282 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7283 MapDecl &&
7284 MapDecl->getType()->isLValueReferenceType();
7285 bool IsNonDerefPointer = IsPointer &&
7286 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7287 !IsNonContiguous;
7289 if (OASE)
7290 ++DimSize;
7292 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7293 IsFinalArraySection) {
7294 // If this is not the last component, we expect the pointer to be
7295 // associated with an array expression or member expression.
7296 assert((Next == CE ||
7297 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7298 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7299 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7300 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7301 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7302 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7303 "Unexpected expression");
7305 Address LB = Address::invalid();
7306 Address LowestElem = Address::invalid();
7307 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7308 const MemberExpr *E) {
7309 const Expr *BaseExpr = E->getBase();
7310 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7311 // scalar.
7312 LValue BaseLV;
7313 if (E->isArrow()) {
7314 LValueBaseInfo BaseInfo;
7315 TBAAAccessInfo TBAAInfo;
7316 Address Addr =
7317 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7318 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7319 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7320 } else {
7321 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7323 return BaseLV;
7325 if (OAShE) {
7326 LowestElem = LB =
7327 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7328 CGF.ConvertTypeForMem(
7329 OAShE->getBase()->getType()->getPointeeType()),
7330 CGF.getContext().getTypeAlignInChars(
7331 OAShE->getBase()->getType()));
7332 } else if (IsMemberReference) {
7333 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7334 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7335 LowestElem = CGF.EmitLValueForFieldInitialization(
7336 BaseLVal, cast<FieldDecl>(MapDecl))
7337 .getAddress(CGF);
7338 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7339 .getAddress(CGF);
7340 } else {
7341 LowestElem = LB =
7342 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7343 .getAddress(CGF);
7346 // If this component is a pointer inside the base struct then we don't
7347 // need to create any entry for it - it will be combined with the object
7348 // it is pointing to into a single PTR_AND_OBJ entry.
7349 bool IsMemberPointerOrAddr =
7350 EncounteredME &&
7351 (((IsPointer || ForDeviceAddr) &&
7352 I->getAssociatedExpression() == EncounteredME) ||
7353 (IsPrevMemberReference && !IsPointer) ||
7354 (IsMemberReference && Next != CE &&
7355 !Next->getAssociatedExpression()->getType()->isPointerType()));
7356 if (!OverlappedElements.empty() && Next == CE) {
7357 // Handle base element with the info for overlapped elements.
7358 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7359 assert(!IsPointer &&
7360 "Unexpected base element with the pointer type.");
7361 // Mark the whole struct as the struct that requires allocation on the
7362 // device.
7363 PartialStruct.LowestElem = {0, LowestElem};
7364 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7365 I->getAssociatedExpression()->getType());
7366 Address HB = CGF.Builder.CreateConstGEP(
7367 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7368 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7369 TypeSize.getQuantity() - 1);
7370 PartialStruct.HighestElem = {
7371 std::numeric_limits<decltype(
7372 PartialStruct.HighestElem.first)>::max(),
7373 HB};
7374 PartialStruct.Base = BP;
7375 PartialStruct.LB = LB;
7376 assert(
7377 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7378 "Overlapped elements must be used only once for the variable.");
7379 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7380 // Emit data for non-overlapped data.
7381 OpenMPOffloadMappingFlags Flags =
7382 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7383 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7384 /*AddPtrFlag=*/false,
7385 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7386 llvm::Value *Size = nullptr;
7387 // Do bitcopy of all non-overlapped structure elements.
7388 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7389 Component : OverlappedElements) {
7390 Address ComponentLB = Address::invalid();
7391 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7392 Component) {
7393 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7394 const auto *FD = dyn_cast<FieldDecl>(VD);
7395 if (FD && FD->getType()->isLValueReferenceType()) {
7396 const auto *ME =
7397 cast<MemberExpr>(MC.getAssociatedExpression());
7398 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7399 ComponentLB =
7400 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7401 .getAddress(CGF);
7402 } else {
7403 ComponentLB =
7404 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7405 .getAddress(CGF);
7407 Size = CGF.Builder.CreatePtrDiff(
7408 CGF.Int8Ty, ComponentLB.getPointer(), LB.getPointer());
7409 break;
7412 assert(Size && "Failed to determine structure size");
7413 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7414 CombinedInfo.BasePointers.push_back(BP.getPointer());
7415 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7416 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7417 CombinedInfo.Pointers.push_back(LB.getPointer());
7418 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7419 Size, CGF.Int64Ty, /*isSigned=*/true));
7420 CombinedInfo.Types.push_back(Flags);
7421 CombinedInfo.Mappers.push_back(nullptr);
7422 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7423 : 1);
7424 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7426 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7427 CombinedInfo.BasePointers.push_back(BP.getPointer());
7428 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7429 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7430 CombinedInfo.Pointers.push_back(LB.getPointer());
7431 Size = CGF.Builder.CreatePtrDiff(
7432 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7433 LB.getPointer());
7434 CombinedInfo.Sizes.push_back(
7435 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7436 CombinedInfo.Types.push_back(Flags);
7437 CombinedInfo.Mappers.push_back(nullptr);
7438 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7439 : 1);
7440 break;
7442 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7443 if (!IsMemberPointerOrAddr ||
7444 (Next == CE && MapType != OMPC_MAP_unknown)) {
7445 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7446 CombinedInfo.BasePointers.push_back(BP.getPointer());
7447 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7448 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7449 CombinedInfo.Pointers.push_back(LB.getPointer());
7450 CombinedInfo.Sizes.push_back(
7451 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7452 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7453 : 1);
7455 // If Mapper is valid, the last component inherits the mapper.
7456 bool HasMapper = Mapper && Next == CE;
7457 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7459 // We need to add a pointer flag for each map that comes from the
7460 // same expression except for the first one. We also need to signal
7461 // this map is the first one that relates with the current capture
7462 // (there is a set of entries for each capture).
7463 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7464 MapType, MapModifiers, MotionModifiers, IsImplicit,
7465 !IsExpressionFirstInfo || RequiresReference ||
7466 FirstPointerInComplexData || IsMemberReference,
7467 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7469 if (!IsExpressionFirstInfo || IsMemberReference) {
7470 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7471 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7472 if (IsPointer || (IsMemberReference && Next != CE))
7473 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7474 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7475 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7476 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7477 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7479 if (ShouldBeMemberOf) {
7480 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7481 // should be later updated with the correct value of MEMBER_OF.
7482 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7483 // From now on, all subsequent PTR_AND_OBJ entries should not be
7484 // marked as MEMBER_OF.
7485 ShouldBeMemberOf = false;
7489 CombinedInfo.Types.push_back(Flags);
7492 // If we have encountered a member expression so far, keep track of the
7493 // mapped member. If the parent is "*this", then the value declaration
7494 // is nullptr.
7495 if (EncounteredME) {
7496 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7497 unsigned FieldIndex = FD->getFieldIndex();
7499 // Update info about the lowest and highest elements for this struct
7500 if (!PartialStruct.Base.isValid()) {
7501 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7502 if (IsFinalArraySection) {
7503 Address HB =
7504 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7505 .getAddress(CGF);
7506 PartialStruct.HighestElem = {FieldIndex, HB};
7507 } else {
7508 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7510 PartialStruct.Base = BP;
7511 PartialStruct.LB = BP;
7512 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7513 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7514 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7515 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7519 // Need to emit combined struct for array sections.
7520 if (IsFinalArraySection || IsNonContiguous)
7521 PartialStruct.IsArraySection = true;
7523 // If we have a final array section, we are done with this expression.
7524 if (IsFinalArraySection)
7525 break;
7527 // The pointer becomes the base for the next element.
7528 if (Next != CE)
7529 BP = IsMemberReference ? LowestElem : LB;
7531 IsExpressionFirstInfo = false;
7532 IsCaptureFirstInfo = false;
7533 FirstPointerInComplexData = false;
7534 IsPrevMemberReference = IsMemberReference;
7535 } else if (FirstPointerInComplexData) {
7536 QualType Ty = Components.rbegin()
7537 ->getAssociatedDeclaration()
7538 ->getType()
7539 .getNonReferenceType();
7540 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7541 FirstPointerInComplexData = false;
7544 // If ran into the whole component - allocate the space for the whole
7545 // record.
7546 if (!EncounteredME)
7547 PartialStruct.HasCompleteRecord = true;
7549 if (!IsNonContiguous)
7550 return;
7552 const ASTContext &Context = CGF.getContext();
7554 // For supporting stride in array section, we need to initialize the first
7555 // dimension size as 1, first offset as 0, and first count as 1
7556 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7557 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7558 MapValuesArrayTy CurStrides;
7559 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7560 uint64_t ElementTypeSize;
7562 // Collect Size information for each dimension and get the element size as
7563 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7564 // should be [10, 10] and the first stride is 4 btyes.
7565 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7566 Components) {
7567 const Expr *AssocExpr = Component.getAssociatedExpression();
7568 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7570 if (!OASE)
7571 continue;
7573 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7574 auto *CAT = Context.getAsConstantArrayType(Ty);
7575 auto *VAT = Context.getAsVariableArrayType(Ty);
7577 // We need all the dimension size except for the last dimension.
7578 assert((VAT || CAT || &Component == &*Components.begin()) &&
7579 "Should be either ConstantArray or VariableArray if not the "
7580 "first Component");
7582 // Get element size if CurStrides is empty.
7583 if (CurStrides.empty()) {
7584 const Type *ElementType = nullptr;
7585 if (CAT)
7586 ElementType = CAT->getElementType().getTypePtr();
7587 else if (VAT)
7588 ElementType = VAT->getElementType().getTypePtr();
7589 else
7590 assert(&Component == &*Components.begin() &&
7591 "Only expect pointer (non CAT or VAT) when this is the "
7592 "first Component");
7593 // If ElementType is null, then it means the base is a pointer
7594 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7595 // for next iteration.
7596 if (ElementType) {
7597 // For the case that having pointer as base, we need to remove one
7598 // level of indirection.
7599 if (&Component != &*Components.begin())
7600 ElementType = ElementType->getPointeeOrArrayElementType();
7601 ElementTypeSize =
7602 Context.getTypeSizeInChars(ElementType).getQuantity();
7603 CurStrides.push_back(
7604 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7607 // Get dimension value except for the last dimension since we don't need
7608 // it.
7609 if (DimSizes.size() < Components.size() - 1) {
7610 if (CAT)
7611 DimSizes.push_back(llvm::ConstantInt::get(
7612 CGF.Int64Ty, CAT->getSize().getZExtValue()));
7613 else if (VAT)
7614 DimSizes.push_back(CGF.Builder.CreateIntCast(
7615 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7616 /*IsSigned=*/false));
7620 // Skip the dummy dimension since we have already have its information.
7621 auto *DI = DimSizes.begin() + 1;
7622 // Product of dimension.
7623 llvm::Value *DimProd =
7624 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7626 // Collect info for non-contiguous. Notice that offset, count, and stride
7627 // are only meaningful for array-section, so we insert a null for anything
7628 // other than array-section.
7629 // Also, the size of offset, count, and stride are not the same as
7630 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7631 // count, and stride are the same as the number of non-contiguous
7632 // declaration in target update to/from clause.
7633 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7634 Components) {
7635 const Expr *AssocExpr = Component.getAssociatedExpression();
7637 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7638 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7639 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7640 /*isSigned=*/false);
7641 CurOffsets.push_back(Offset);
7642 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7643 CurStrides.push_back(CurStrides.back());
7644 continue;
7647 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7649 if (!OASE)
7650 continue;
7652 // Offset
7653 const Expr *OffsetExpr = OASE->getLowerBound();
7654 llvm::Value *Offset = nullptr;
7655 if (!OffsetExpr) {
7656 // If offset is absent, then we just set it to zero.
7657 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7658 } else {
7659 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7660 CGF.Int64Ty,
7661 /*isSigned=*/false);
7663 CurOffsets.push_back(Offset);
7665 // Count
7666 const Expr *CountExpr = OASE->getLength();
7667 llvm::Value *Count = nullptr;
7668 if (!CountExpr) {
7669 // In Clang, once a high dimension is an array section, we construct all
7670 // the lower dimension as array section, however, for case like
7671 // arr[0:2][2], Clang construct the inner dimension as an array section
7672 // but it actually is not in an array section form according to spec.
7673 if (!OASE->getColonLocFirst().isValid() &&
7674 !OASE->getColonLocSecond().isValid()) {
7675 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7676 } else {
7677 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7678 // When the length is absent it defaults to ⌈(size −
7679 // lower-bound)/stride⌉, where size is the size of the array
7680 // dimension.
7681 const Expr *StrideExpr = OASE->getStride();
7682 llvm::Value *Stride =
7683 StrideExpr
7684 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7685 CGF.Int64Ty, /*isSigned=*/false)
7686 : nullptr;
7687 if (Stride)
7688 Count = CGF.Builder.CreateUDiv(
7689 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7690 else
7691 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7693 } else {
7694 Count = CGF.EmitScalarExpr(CountExpr);
7696 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7697 CurCounts.push_back(Count);
7699 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7700 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7701 // Offset Count Stride
7702 // D0 0 1 4 (int) <- dummy dimension
7703 // D1 0 2 8 (2 * (1) * 4)
7704 // D2 1 2 20 (1 * (1 * 5) * 4)
7705 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7706 const Expr *StrideExpr = OASE->getStride();
7707 llvm::Value *Stride =
7708 StrideExpr
7709 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7710 CGF.Int64Ty, /*isSigned=*/false)
7711 : nullptr;
7712 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7713 if (Stride)
7714 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7715 else
7716 CurStrides.push_back(DimProd);
7717 if (DI != DimSizes.end())
7718 ++DI;
7721 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7722 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7723 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7726 /// Return the adjusted map modifiers if the declaration a capture refers to
7727 /// appears in a first-private clause. This is expected to be used only with
7728 /// directives that start with 'target'.
7729 OpenMPOffloadMappingFlags
7730 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7731 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7733 // A first private variable captured by reference will use only the
7734 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7735 // declaration is known as first-private in this handler.
7736 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7737 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7738 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7739 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7740 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7741 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7743 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7744 if (I != LambdasMap.end())
7745 // for map(to: lambda): using user specified map type.
7746 return getMapTypeBits(
7747 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7748 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7749 /*AddPtrFlag=*/false,
7750 /*AddIsTargetParamFlag=*/false,
7751 /*isNonContiguous=*/false);
7752 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7753 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7756 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7757 // Rotate by getFlagMemberOffset() bits.
7758 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7759 << getFlagMemberOffset());
7762 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7763 OpenMPOffloadMappingFlags MemberOfFlag) {
7764 // If the entry is PTR_AND_OBJ but has not been marked with the special
7765 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7766 // marked as MEMBER_OF.
7767 if (static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7768 Flags & OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) &&
7769 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7770 (Flags & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
7771 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF))
7772 return;
7774 // Reset the placeholder value to prepare the flag for the assignment of the
7775 // proper MEMBER_OF value.
7776 Flags &= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7777 Flags |= MemberOfFlag;
7780 void getPlainLayout(const CXXRecordDecl *RD,
7781 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7782 bool AsBase) const {
7783 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7785 llvm::StructType *St =
7786 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7788 unsigned NumElements = St->getNumElements();
7789 llvm::SmallVector<
7790 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7791 RecordLayout(NumElements);
7793 // Fill bases.
7794 for (const auto &I : RD->bases()) {
7795 if (I.isVirtual())
7796 continue;
7797 const auto *Base = I.getType()->getAsCXXRecordDecl();
7798 // Ignore empty bases.
7799 if (Base->isEmpty() || CGF.getContext()
7800 .getASTRecordLayout(Base)
7801 .getNonVirtualSize()
7802 .isZero())
7803 continue;
7805 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7806 RecordLayout[FieldIndex] = Base;
7808 // Fill in virtual bases.
7809 for (const auto &I : RD->vbases()) {
7810 const auto *Base = I.getType()->getAsCXXRecordDecl();
7811 // Ignore empty bases.
7812 if (Base->isEmpty())
7813 continue;
7814 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7815 if (RecordLayout[FieldIndex])
7816 continue;
7817 RecordLayout[FieldIndex] = Base;
7819 // Fill in all the fields.
7820 assert(!RD->isUnion() && "Unexpected union.");
7821 for (const auto *Field : RD->fields()) {
7822 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7823 // will fill in later.)
7824 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7825 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7826 RecordLayout[FieldIndex] = Field;
7829 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7830 &Data : RecordLayout) {
7831 if (Data.isNull())
7832 continue;
7833 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7834 getPlainLayout(Base, Layout, /*AsBase=*/true);
7835 else
7836 Layout.push_back(Data.get<const FieldDecl *>());
7840 /// Generate all the base pointers, section pointers, sizes, map types, and
7841 /// mappers for the extracted mappable expressions (all included in \a
7842 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7843 /// pair of the relevant declaration and index where it occurs is appended to
7844 /// the device pointers info array.
7845 void generateAllInfoForClauses(
7846 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7847 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7848 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7849 // We have to process the component lists that relate with the same
7850 // declaration in a single chunk so that we can generate the map flags
7851 // correctly. Therefore, we organize all lists in a map.
7852 enum MapKind { Present, Allocs, Other, Total };
7853 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7854 SmallVector<SmallVector<MapInfo, 8>, 4>>
7855 Info;
7857 // Helper function to fill the information map for the different supported
7858 // clauses.
7859 auto &&InfoGen =
7860 [&Info, &SkipVarSet](
7861 const ValueDecl *D, MapKind Kind,
7862 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7863 OpenMPMapClauseKind MapType,
7864 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7865 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7866 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7867 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7868 if (SkipVarSet.contains(D))
7869 return;
7870 auto It = Info.find(D);
7871 if (It == Info.end())
7872 It = Info
7873 .insert(std::make_pair(
7874 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7875 .first;
7876 It->second[Kind].emplace_back(
7877 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7878 IsImplicit, Mapper, VarRef, ForDeviceAddr);
7881 for (const auto *Cl : Clauses) {
7882 const auto *C = dyn_cast<OMPMapClause>(Cl);
7883 if (!C)
7884 continue;
7885 MapKind Kind = Other;
7886 if (llvm::is_contained(C->getMapTypeModifiers(),
7887 OMPC_MAP_MODIFIER_present))
7888 Kind = Present;
7889 else if (C->getMapType() == OMPC_MAP_alloc)
7890 Kind = Allocs;
7891 const auto *EI = C->getVarRefs().begin();
7892 for (const auto L : C->component_lists()) {
7893 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7894 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7895 C->getMapTypeModifiers(), std::nullopt,
7896 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7898 ++EI;
7901 for (const auto *Cl : Clauses) {
7902 const auto *C = dyn_cast<OMPToClause>(Cl);
7903 if (!C)
7904 continue;
7905 MapKind Kind = Other;
7906 if (llvm::is_contained(C->getMotionModifiers(),
7907 OMPC_MOTION_MODIFIER_present))
7908 Kind = Present;
7909 const auto *EI = C->getVarRefs().begin();
7910 for (const auto L : C->component_lists()) {
7911 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7912 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7913 C->isImplicit(), std::get<2>(L), *EI);
7914 ++EI;
7917 for (const auto *Cl : Clauses) {
7918 const auto *C = dyn_cast<OMPFromClause>(Cl);
7919 if (!C)
7920 continue;
7921 MapKind Kind = Other;
7922 if (llvm::is_contained(C->getMotionModifiers(),
7923 OMPC_MOTION_MODIFIER_present))
7924 Kind = Present;
7925 const auto *EI = C->getVarRefs().begin();
7926 for (const auto L : C->component_lists()) {
7927 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7928 std::nullopt, C->getMotionModifiers(),
7929 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7930 *EI);
7931 ++EI;
7935 // Look at the use_device_ptr and use_device_addr clauses information and
7936 // mark the existing map entries as such. If there is no map information for
7937 // an entry in the use_device_ptr and use_device_addr list, we create one
7938 // with map type 'alloc' and zero size section. It is the user fault if that
7939 // was not mapped before. If there is no map information and the pointer is
7940 // a struct member, then we defer the emission of that entry until the whole
7941 // struct has been processed.
7942 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7943 SmallVector<DeferredDevicePtrEntryTy, 4>>
7944 DeferredInfo;
7945 MapCombinedInfoTy UseDeviceDataCombinedInfo;
7947 auto &&UseDeviceDataCombinedInfoGen =
7948 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7949 CodeGenFunction &CGF, bool IsDevAddr) {
7950 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7951 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7952 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7953 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7954 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7955 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7956 UseDeviceDataCombinedInfo.Sizes.push_back(
7957 llvm::Constant::getNullValue(CGF.Int64Ty));
7958 UseDeviceDataCombinedInfo.Types.push_back(
7959 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7960 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7963 auto &&MapInfoGen =
7964 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7965 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7966 OMPClauseMappableExprCommon::MappableExprComponentListRef
7967 Components,
7968 bool IsImplicit, bool IsDevAddr) {
7969 // We didn't find any match in our map information - generate a zero
7970 // size array section - if the pointer is a struct member we defer
7971 // this action until the whole struct has been processed.
7972 if (isa<MemberExpr>(IE)) {
7973 // Insert the pointer into Info to be processed by
7974 // generateInfoForComponentList. Because it is a member pointer
7975 // without a pointee, no entry will be generated for it, therefore
7976 // we need to generate one after the whole struct has been
7977 // processed. Nonetheless, generateInfoForComponentList must be
7978 // called to take the pointer into account for the calculation of
7979 // the range of the partial struct.
7980 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7981 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7982 nullptr, nullptr, IsDevAddr);
7983 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7984 } else {
7985 llvm::Value *Ptr;
7986 if (IsDevAddr) {
7987 if (IE->isGLValue())
7988 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7989 else
7990 Ptr = CGF.EmitScalarExpr(IE);
7991 } else {
7992 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7994 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7998 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7999 const Expr *IE, bool IsDevAddr) -> bool {
8000 // We potentially have map information for this declaration already.
8001 // Look for the first set of components that refer to it. If found,
8002 // return true.
8003 // If the first component is a member expression, we have to look into
8004 // 'this', which maps to null in the map of map information. Otherwise
8005 // look directly for the information.
8006 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8007 if (It != Info.end()) {
8008 bool Found = false;
8009 for (auto &Data : It->second) {
8010 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8011 return MI.Components.back().getAssociatedDeclaration() == VD;
8013 // If we found a map entry, signal that the pointer has to be
8014 // returned and move on to the next declaration. Exclude cases where
8015 // the base pointer is mapped as array subscript, array section or
8016 // array shaping. The base address is passed as a pointer to base in
8017 // this case and cannot be used as a base for use_device_ptr list
8018 // item.
8019 if (CI != Data.end()) {
8020 if (IsDevAddr) {
8021 CI->ForDeviceAddr = IsDevAddr;
8022 CI->ReturnDevicePointer = true;
8023 Found = true;
8024 break;
8025 } else {
8026 auto PrevCI = std::next(CI->Components.rbegin());
8027 const auto *VarD = dyn_cast<VarDecl>(VD);
8028 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8029 isa<MemberExpr>(IE) ||
8030 !VD->getType().getNonReferenceType()->isPointerType() ||
8031 PrevCI == CI->Components.rend() ||
8032 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8033 VarD->hasLocalStorage()) {
8034 CI->ForDeviceAddr = IsDevAddr;
8035 CI->ReturnDevicePointer = true;
8036 Found = true;
8037 break;
8042 return Found;
8044 return false;
8047 // Look at the use_device_ptr clause information and mark the existing map
8048 // entries as such. If there is no map information for an entry in the
8049 // use_device_ptr list, we create one with map type 'alloc' and zero size
8050 // section. It is the user fault if that was not mapped before. If there is
8051 // no map information and the pointer is a struct member, then we defer the
8052 // emission of that entry until the whole struct has been processed.
8053 for (const auto *Cl : Clauses) {
8054 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8055 if (!C)
8056 continue;
8057 for (const auto L : C->component_lists()) {
8058 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8059 std::get<1>(L);
8060 assert(!Components.empty() &&
8061 "Not expecting empty list of components!");
8062 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8063 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8064 const Expr *IE = Components.back().getAssociatedExpression();
8065 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8066 continue;
8067 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8068 /*IsDevAddr=*/false);
8072 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8073 for (const auto *Cl : Clauses) {
8074 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8075 if (!C)
8076 continue;
8077 for (const auto L : C->component_lists()) {
8078 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8079 std::get<1>(L);
8080 assert(!std::get<1>(L).empty() &&
8081 "Not expecting empty list of components!");
8082 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8083 if (!Processed.insert(VD).second)
8084 continue;
8085 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8086 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8087 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8088 continue;
8089 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8090 /*IsDevAddr=*/true);
8094 for (const auto &Data : Info) {
8095 StructRangeInfoTy PartialStruct;
8096 // Temporary generated information.
8097 MapCombinedInfoTy CurInfo;
8098 const Decl *D = Data.first;
8099 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8100 for (const auto &M : Data.second) {
8101 for (const MapInfo &L : M) {
8102 assert(!L.Components.empty() &&
8103 "Not expecting declaration with no component lists.");
8105 // Remember the current base pointer index.
8106 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8107 CurInfo.NonContigInfo.IsNonContiguous =
8108 L.Components.back().isNonContiguous();
8109 generateInfoForComponentList(
8110 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8111 CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8112 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8114 // If this entry relates with a device pointer, set the relevant
8115 // declaration and add the 'return pointer' flag.
8116 if (L.ReturnDevicePointer) {
8117 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8118 "Unexpected number of mapped base pointers.");
8120 const ValueDecl *RelevantVD =
8121 L.Components.back().getAssociatedDeclaration();
8122 assert(RelevantVD &&
8123 "No relevant declaration related with device pointer??");
8125 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8126 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8127 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer;
8128 CurInfo.Types[CurrentBasePointersIdx] |=
8129 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8134 // Append any pending zero-length pointers which are struct members and
8135 // used with use_device_ptr or use_device_addr.
8136 auto CI = DeferredInfo.find(Data.first);
8137 if (CI != DeferredInfo.end()) {
8138 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8139 llvm::Value *BasePtr;
8140 llvm::Value *Ptr;
8141 if (L.ForDeviceAddr) {
8142 if (L.IE->isGLValue())
8143 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8144 else
8145 Ptr = this->CGF.EmitScalarExpr(L.IE);
8146 BasePtr = Ptr;
8147 // Entry is RETURN_PARAM. Also, set the placeholder value
8148 // MEMBER_OF=FFFF so that the entry is later updated with the
8149 // correct value of MEMBER_OF.
8150 CurInfo.Types.push_back(
8151 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8152 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8153 } else {
8154 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8155 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8156 L.IE->getExprLoc());
8157 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8158 // placeholder value MEMBER_OF=FFFF so that the entry is later
8159 // updated with the correct value of MEMBER_OF.
8160 CurInfo.Types.push_back(
8161 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8162 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8163 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8165 CurInfo.Exprs.push_back(L.VD);
8166 CurInfo.BasePointers.emplace_back(BasePtr);
8167 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8168 CurInfo.DevicePointers.emplace_back(
8169 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8170 CurInfo.Pointers.push_back(Ptr);
8171 CurInfo.Sizes.push_back(
8172 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8173 CurInfo.Mappers.push_back(nullptr);
8176 // If there is an entry in PartialStruct it means we have a struct with
8177 // individual members mapped. Emit an extra combined entry.
8178 if (PartialStruct.Base.isValid()) {
8179 CurInfo.NonContigInfo.Dims.push_back(0);
8180 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
8181 /*IsMapThis*/ !VD, VD);
8184 // We need to append the results of this capture to what we already
8185 // have.
8186 CombinedInfo.append(CurInfo);
8188 // Append data for use_device_ptr clauses.
8189 CombinedInfo.append(UseDeviceDataCombinedInfo);
8192 public:
8193 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8194 : CurDir(&Dir), CGF(CGF) {
8195 // Extract firstprivate clause information.
8196 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8197 for (const auto *D : C->varlists())
8198 FirstPrivateDecls.try_emplace(
8199 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8200 // Extract implicit firstprivates from uses_allocators clauses.
8201 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8202 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8203 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8204 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8205 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8206 /*Implicit=*/true);
8207 else if (const auto *VD = dyn_cast<VarDecl>(
8208 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8209 ->getDecl()))
8210 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8213 // Extract device pointer clause information.
8214 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8215 for (auto L : C->component_lists())
8216 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8217 // Extract device addr clause information.
8218 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8219 for (auto L : C->component_lists())
8220 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8221 // Extract map information.
8222 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8223 if (C->getMapType() != OMPC_MAP_to)
8224 continue;
8225 for (auto L : C->component_lists()) {
8226 const ValueDecl *VD = std::get<0>(L);
8227 const auto *RD = VD ? VD->getType()
8228 .getCanonicalType()
8229 .getNonReferenceType()
8230 ->getAsCXXRecordDecl()
8231 : nullptr;
8232 if (RD && RD->isLambda())
8233 LambdasMap.try_emplace(std::get<0>(L), C);
8238 /// Constructor for the declare mapper directive.
8239 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8240 : CurDir(&Dir), CGF(CGF) {}
8242 /// Generate code for the combined entry if we have a partially mapped struct
8243 /// and take care of the mapping flags of the arguments corresponding to
8244 /// individual struct members.
8245 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8246 MapFlagsArrayTy &CurTypes,
8247 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8248 const ValueDecl *VD = nullptr,
8249 bool NotTargetParams = true) const {
8250 if (CurTypes.size() == 1 &&
8251 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8252 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8253 !PartialStruct.IsArraySection)
8254 return;
8255 Address LBAddr = PartialStruct.LowestElem.second;
8256 Address HBAddr = PartialStruct.HighestElem.second;
8257 if (PartialStruct.HasCompleteRecord) {
8258 LBAddr = PartialStruct.LB;
8259 HBAddr = PartialStruct.LB;
8261 CombinedInfo.Exprs.push_back(VD);
8262 // Base is the base of the struct
8263 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8264 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8265 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8266 // Pointer is the address of the lowest element
8267 llvm::Value *LB = LBAddr.getPointer();
8268 const CXXMethodDecl *MD =
8269 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8270 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8271 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8272 // There should not be a mapper for a combined entry.
8273 if (HasBaseClass) {
8274 // OpenMP 5.2 148:21:
8275 // If the target construct is within a class non-static member function,
8276 // and a variable is an accessible data member of the object for which the
8277 // non-static data member function is invoked, the variable is treated as
8278 // if the this[:1] expression had appeared in a map clause with a map-type
8279 // of tofrom.
8280 // Emit this[:1]
8281 CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
8282 QualType Ty = MD->getThisType()->getPointeeType();
8283 llvm::Value *Size =
8284 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8285 /*isSigned=*/true);
8286 CombinedInfo.Sizes.push_back(Size);
8287 } else {
8288 CombinedInfo.Pointers.push_back(LB);
8289 // Size is (addr of {highest+1} element) - (addr of lowest element)
8290 llvm::Value *HB = HBAddr.getPointer();
8291 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8292 HBAddr.getElementType(), HB, /*Idx0=*/1);
8293 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8294 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8295 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8296 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8297 /*isSigned=*/false);
8298 CombinedInfo.Sizes.push_back(Size);
8300 CombinedInfo.Mappers.push_back(nullptr);
8301 // Map type is always TARGET_PARAM, if generate info for captures.
8302 CombinedInfo.Types.push_back(
8303 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8304 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8305 // If any element has the present modifier, then make sure the runtime
8306 // doesn't attempt to allocate the struct.
8307 if (CurTypes.end() !=
8308 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8309 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8310 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8312 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8313 // Remove TARGET_PARAM flag from the first element
8314 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8315 // If any element has the ompx_hold modifier, then make sure the runtime
8316 // uses the hold reference count for the struct as a whole so that it won't
8317 // be unmapped by an extra dynamic reference count decrement. Add it to all
8318 // elements as well so the runtime knows which reference count to check
8319 // when determining whether it's time for device-to-host transfers of
8320 // individual elements.
8321 if (CurTypes.end() !=
8322 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8323 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8324 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8325 })) {
8326 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8327 for (auto &M : CurTypes)
8328 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8331 // All other current entries will be MEMBER_OF the combined entry
8332 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8333 // 0xFFFF in the MEMBER_OF field).
8334 OpenMPOffloadMappingFlags MemberOfFlag =
8335 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8336 for (auto &M : CurTypes)
8337 setCorrectMemberOfFlag(M, MemberOfFlag);
8340 /// Generate all the base pointers, section pointers, sizes, map types, and
8341 /// mappers for the extracted mappable expressions (all included in \a
8342 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8343 /// pair of the relevant declaration and index where it occurs is appended to
8344 /// the device pointers info array.
8345 void generateAllInfo(
8346 MapCombinedInfoTy &CombinedInfo,
8347 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8348 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8349 assert(CurDir.is<const OMPExecutableDirective *>() &&
8350 "Expect a executable directive");
8351 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8352 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8355 /// Generate all the base pointers, section pointers, sizes, map types, and
8356 /// mappers for the extracted map clauses of user-defined mapper (all included
8357 /// in \a CombinedInfo).
8358 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8359 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8360 "Expect a declare mapper directive");
8361 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8362 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8365 /// Emit capture info for lambdas for variables captured by reference.
8366 void generateInfoForLambdaCaptures(
8367 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8368 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8369 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8370 const auto *RD = VDType->getAsCXXRecordDecl();
8371 if (!RD || !RD->isLambda())
8372 return;
8373 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8374 CGF.getContext().getDeclAlign(VD));
8375 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8376 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8377 FieldDecl *ThisCapture = nullptr;
8378 RD->getCaptureFields(Captures, ThisCapture);
8379 if (ThisCapture) {
8380 LValue ThisLVal =
8381 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8382 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8383 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8384 VDLVal.getPointer(CGF));
8385 CombinedInfo.Exprs.push_back(VD);
8386 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8387 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8388 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8389 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8390 CombinedInfo.Sizes.push_back(
8391 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8392 CGF.Int64Ty, /*isSigned=*/true));
8393 CombinedInfo.Types.push_back(
8394 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8395 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8396 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8397 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8398 CombinedInfo.Mappers.push_back(nullptr);
8400 for (const LambdaCapture &LC : RD->captures()) {
8401 if (!LC.capturesVariable())
8402 continue;
8403 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8404 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8405 continue;
8406 auto It = Captures.find(VD);
8407 assert(It != Captures.end() && "Found lambda capture without field.");
8408 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8409 if (LC.getCaptureKind() == LCK_ByRef) {
8410 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8411 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8412 VDLVal.getPointer(CGF));
8413 CombinedInfo.Exprs.push_back(VD);
8414 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8415 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8416 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8417 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8418 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8419 CGF.getTypeSize(
8420 VD->getType().getCanonicalType().getNonReferenceType()),
8421 CGF.Int64Ty, /*isSigned=*/true));
8422 } else {
8423 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8424 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8425 VDLVal.getPointer(CGF));
8426 CombinedInfo.Exprs.push_back(VD);
8427 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8428 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8429 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8430 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8431 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8433 CombinedInfo.Types.push_back(
8434 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8435 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8436 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8437 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8438 CombinedInfo.Mappers.push_back(nullptr);
8442 /// Set correct indices for lambdas captures.
8443 void adjustMemberOfForLambdaCaptures(
8444 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8445 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8446 MapFlagsArrayTy &Types) const {
8447 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8448 // Set correct member_of idx for all implicit lambda captures.
8449 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8450 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8451 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8452 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8453 continue;
8454 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8455 assert(BasePtr && "Unable to find base lambda address.");
8456 int TgtIdx = -1;
8457 for (unsigned J = I; J > 0; --J) {
8458 unsigned Idx = J - 1;
8459 if (Pointers[Idx] != BasePtr)
8460 continue;
8461 TgtIdx = Idx;
8462 break;
8464 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8465 // All other current entries will be MEMBER_OF the combined entry
8466 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8467 // 0xFFFF in the MEMBER_OF field).
8468 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8469 setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8473 /// Generate the base pointers, section pointers, sizes, map types, and
8474 /// mappers associated to a given capture (all included in \a CombinedInfo).
8475 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8476 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8477 StructRangeInfoTy &PartialStruct) const {
8478 assert(!Cap->capturesVariableArrayType() &&
8479 "Not expecting to generate map info for a variable array type!");
8481 // We need to know when we generating information for the first component
8482 const ValueDecl *VD = Cap->capturesThis()
8483 ? nullptr
8484 : Cap->getCapturedVar()->getCanonicalDecl();
8486 // for map(to: lambda): skip here, processing it in
8487 // generateDefaultMapInfo
8488 if (LambdasMap.count(VD))
8489 return;
8491 // If this declaration appears in a is_device_ptr clause we just have to
8492 // pass the pointer by value. If it is a reference to a declaration, we just
8493 // pass its value.
8494 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8495 CombinedInfo.Exprs.push_back(VD);
8496 CombinedInfo.BasePointers.emplace_back(Arg);
8497 CombinedInfo.DevicePtrDecls.emplace_back(VD);
8498 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8499 CombinedInfo.Pointers.push_back(Arg);
8500 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8501 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8502 /*isSigned=*/true));
8503 CombinedInfo.Types.push_back(
8504 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8505 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8506 CombinedInfo.Mappers.push_back(nullptr);
8507 return;
8510 using MapData =
8511 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8512 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8513 const ValueDecl *, const Expr *>;
8514 SmallVector<MapData, 4> DeclComponentLists;
8515 // For member fields list in is_device_ptr, store it in
8516 // DeclComponentLists for generating components info.
8517 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8518 auto It = DevPointersMap.find(VD);
8519 if (It != DevPointersMap.end())
8520 for (const auto &MCL : It->second)
8521 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8522 /*IsImpicit = */ true, nullptr,
8523 nullptr);
8524 auto I = HasDevAddrsMap.find(VD);
8525 if (I != HasDevAddrsMap.end())
8526 for (const auto &MCL : I->second)
8527 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8528 /*IsImpicit = */ true, nullptr,
8529 nullptr);
8530 assert(CurDir.is<const OMPExecutableDirective *>() &&
8531 "Expect a executable directive");
8532 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8533 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8534 const auto *EI = C->getVarRefs().begin();
8535 for (const auto L : C->decl_component_lists(VD)) {
8536 const ValueDecl *VDecl, *Mapper;
8537 // The Expression is not correct if the mapping is implicit
8538 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8539 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8540 std::tie(VDecl, Components, Mapper) = L;
8541 assert(VDecl == VD && "We got information for the wrong declaration??");
8542 assert(!Components.empty() &&
8543 "Not expecting declaration with no component lists.");
8544 DeclComponentLists.emplace_back(Components, C->getMapType(),
8545 C->getMapTypeModifiers(),
8546 C->isImplicit(), Mapper, E);
8547 ++EI;
8550 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8551 const MapData &RHS) {
8552 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8553 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8554 bool HasPresent =
8555 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8556 bool HasAllocs = MapType == OMPC_MAP_alloc;
8557 MapModifiers = std::get<2>(RHS);
8558 MapType = std::get<1>(LHS);
8559 bool HasPresentR =
8560 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8561 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8562 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8565 // Find overlapping elements (including the offset from the base element).
8566 llvm::SmallDenseMap<
8567 const MapData *,
8568 llvm::SmallVector<
8569 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8571 OverlappedData;
8572 size_t Count = 0;
8573 for (const MapData &L : DeclComponentLists) {
8574 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8575 OpenMPMapClauseKind MapType;
8576 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8577 bool IsImplicit;
8578 const ValueDecl *Mapper;
8579 const Expr *VarRef;
8580 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8582 ++Count;
8583 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8584 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8585 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8586 VarRef) = L1;
8587 auto CI = Components.rbegin();
8588 auto CE = Components.rend();
8589 auto SI = Components1.rbegin();
8590 auto SE = Components1.rend();
8591 for (; CI != CE && SI != SE; ++CI, ++SI) {
8592 if (CI->getAssociatedExpression()->getStmtClass() !=
8593 SI->getAssociatedExpression()->getStmtClass())
8594 break;
8595 // Are we dealing with different variables/fields?
8596 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8597 break;
8599 // Found overlapping if, at least for one component, reached the head
8600 // of the components list.
8601 if (CI == CE || SI == SE) {
8602 // Ignore it if it is the same component.
8603 if (CI == CE && SI == SE)
8604 continue;
8605 const auto It = (SI == SE) ? CI : SI;
8606 // If one component is a pointer and another one is a kind of
8607 // dereference of this pointer (array subscript, section, dereference,
8608 // etc.), it is not an overlapping.
8609 // Same, if one component is a base and another component is a
8610 // dereferenced pointer memberexpr with the same base.
8611 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8612 (std::prev(It)->getAssociatedDeclaration() &&
8613 std::prev(It)
8614 ->getAssociatedDeclaration()
8615 ->getType()
8616 ->isPointerType()) ||
8617 (It->getAssociatedDeclaration() &&
8618 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8619 std::next(It) != CE && std::next(It) != SE))
8620 continue;
8621 const MapData &BaseData = CI == CE ? L : L1;
8622 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8623 SI == SE ? Components : Components1;
8624 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8625 OverlappedElements.getSecond().push_back(SubData);
8629 // Sort the overlapped elements for each item.
8630 llvm::SmallVector<const FieldDecl *, 4> Layout;
8631 if (!OverlappedData.empty()) {
8632 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8633 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8634 while (BaseType != OrigType) {
8635 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8636 OrigType = BaseType->getPointeeOrArrayElementType();
8639 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8640 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8641 else {
8642 const auto *RD = BaseType->getAsRecordDecl();
8643 Layout.append(RD->field_begin(), RD->field_end());
8646 for (auto &Pair : OverlappedData) {
8647 llvm::stable_sort(
8648 Pair.getSecond(),
8649 [&Layout](
8650 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8651 OMPClauseMappableExprCommon::MappableExprComponentListRef
8652 Second) {
8653 auto CI = First.rbegin();
8654 auto CE = First.rend();
8655 auto SI = Second.rbegin();
8656 auto SE = Second.rend();
8657 for (; CI != CE && SI != SE; ++CI, ++SI) {
8658 if (CI->getAssociatedExpression()->getStmtClass() !=
8659 SI->getAssociatedExpression()->getStmtClass())
8660 break;
8661 // Are we dealing with different variables/fields?
8662 if (CI->getAssociatedDeclaration() !=
8663 SI->getAssociatedDeclaration())
8664 break;
8667 // Lists contain the same elements.
8668 if (CI == CE && SI == SE)
8669 return false;
8671 // List with less elements is less than list with more elements.
8672 if (CI == CE || SI == SE)
8673 return CI == CE;
8675 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8676 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8677 if (FD1->getParent() == FD2->getParent())
8678 return FD1->getFieldIndex() < FD2->getFieldIndex();
8679 const auto *It =
8680 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8681 return FD == FD1 || FD == FD2;
8683 return *It == FD1;
8687 // Associated with a capture, because the mapping flags depend on it.
8688 // Go through all of the elements with the overlapped elements.
8689 bool IsFirstComponentList = true;
8690 for (const auto &Pair : OverlappedData) {
8691 const MapData &L = *Pair.getFirst();
8692 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8693 OpenMPMapClauseKind MapType;
8694 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8695 bool IsImplicit;
8696 const ValueDecl *Mapper;
8697 const Expr *VarRef;
8698 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8700 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8701 OverlappedComponents = Pair.getSecond();
8702 generateInfoForComponentList(
8703 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8704 PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8705 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8706 IsFirstComponentList = false;
8708 // Go through other elements without overlapped elements.
8709 for (const MapData &L : DeclComponentLists) {
8710 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8711 OpenMPMapClauseKind MapType;
8712 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8713 bool IsImplicit;
8714 const ValueDecl *Mapper;
8715 const Expr *VarRef;
8716 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8718 auto It = OverlappedData.find(&L);
8719 if (It == OverlappedData.end())
8720 generateInfoForComponentList(MapType, MapModifiers, std::nullopt,
8721 Components, CombinedInfo, PartialStruct,
8722 IsFirstComponentList, IsImplicit, Mapper,
8723 /*ForDeviceAddr=*/false, VD, VarRef);
8724 IsFirstComponentList = false;
8728 /// Generate the default map information for a given capture \a CI,
8729 /// record field declaration \a RI and captured value \a CV.
8730 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8731 const FieldDecl &RI, llvm::Value *CV,
8732 MapCombinedInfoTy &CombinedInfo) const {
8733 bool IsImplicit = true;
8734 // Do the default mapping.
8735 if (CI.capturesThis()) {
8736 CombinedInfo.Exprs.push_back(nullptr);
8737 CombinedInfo.BasePointers.push_back(CV);
8738 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8739 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8740 CombinedInfo.Pointers.push_back(CV);
8741 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8742 CombinedInfo.Sizes.push_back(
8743 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8744 CGF.Int64Ty, /*isSigned=*/true));
8745 // Default map type.
8746 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8747 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8748 } else if (CI.capturesVariableByCopy()) {
8749 const VarDecl *VD = CI.getCapturedVar();
8750 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8751 CombinedInfo.BasePointers.push_back(CV);
8752 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8753 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8754 CombinedInfo.Pointers.push_back(CV);
8755 if (!RI.getType()->isAnyPointerType()) {
8756 // We have to signal to the runtime captures passed by value that are
8757 // not pointers.
8758 CombinedInfo.Types.push_back(
8759 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8760 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8761 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8762 } else {
8763 // Pointers are implicitly mapped with a zero size and no flags
8764 // (other than first map that is added for all implicit maps).
8765 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8766 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8768 auto I = FirstPrivateDecls.find(VD);
8769 if (I != FirstPrivateDecls.end())
8770 IsImplicit = I->getSecond();
8771 } else {
8772 assert(CI.capturesVariable() && "Expected captured reference.");
8773 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8774 QualType ElementType = PtrTy->getPointeeType();
8775 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8776 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8777 // The default map type for a scalar/complex type is 'to' because by
8778 // default the value doesn't have to be retrieved. For an aggregate
8779 // type, the default is 'tofrom'.
8780 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8781 const VarDecl *VD = CI.getCapturedVar();
8782 auto I = FirstPrivateDecls.find(VD);
8783 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8784 CombinedInfo.BasePointers.push_back(CV);
8785 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8786 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8787 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8788 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8789 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8790 AlignmentSource::Decl));
8791 CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8792 } else {
8793 CombinedInfo.Pointers.push_back(CV);
8795 if (I != FirstPrivateDecls.end())
8796 IsImplicit = I->getSecond();
8798 // Every default map produces a single argument which is a target parameter.
8799 CombinedInfo.Types.back() |=
8800 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8802 // Add flag stating this is an implicit map.
8803 if (IsImplicit)
8804 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8806 // No user-defined mapper for default mapping.
8807 CombinedInfo.Mappers.push_back(nullptr);
8810 } // anonymous namespace
8812 // Try to extract the base declaration from a `this->x` expression if possible.
8813 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8814 if (!E)
8815 return nullptr;
8817 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
8818 if (const MemberExpr *ME =
8819 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8820 return ME->getMemberDecl();
8821 return nullptr;
8824 /// Emit a string constant containing the names of the values mapped to the
8825 /// offloading runtime library.
8826 llvm::Constant *
8827 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8828 MappableExprsHandler::MappingExprInfo &MapExprs) {
8830 uint32_t SrcLocStrSize;
8831 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8832 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8834 SourceLocation Loc;
8835 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8836 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8837 Loc = VD->getLocation();
8838 else
8839 Loc = MapExprs.getMapExpr()->getExprLoc();
8840 } else {
8841 Loc = MapExprs.getMapDecl()->getLocation();
8844 std::string ExprName;
8845 if (MapExprs.getMapExpr()) {
8846 PrintingPolicy P(CGF.getContext().getLangOpts());
8847 llvm::raw_string_ostream OS(ExprName);
8848 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8849 OS.flush();
8850 } else {
8851 ExprName = MapExprs.getMapDecl()->getNameAsString();
8854 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
8855 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8856 PLoc.getLine(), PLoc.getColumn(),
8857 SrcLocStrSize);
8860 /// Emit the arrays used to pass the captures and map information to the
8861 /// offloading runtime library. If there is no map or capture information,
8862 /// return nullptr by reference.
8863 static void emitOffloadingArrays(
8864 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8865 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8866 bool IsNonContiguous = false) {
8867 CodeGenModule &CGM = CGF.CGM;
8869 // Reset the array information.
8870 Info.clearArrayInfo();
8871 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8873 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8874 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8875 CGF.AllocaInsertPt->getIterator());
8876 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8877 CGF.Builder.GetInsertPoint());
8879 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8880 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
8882 if (CGM.getCodeGenOpts().getDebugInfo() !=
8883 llvm::codegenoptions::NoDebugInfo) {
8884 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
8885 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
8886 FillInfoMap);
8889 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8890 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8891 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8895 auto CustomMapperCB = [&](unsigned int I) {
8896 llvm::Value *MFunc = nullptr;
8897 if (CombinedInfo.Mappers[I]) {
8898 Info.HasMapper = true;
8899 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8900 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8902 return MFunc;
8904 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8905 /*IsNonContiguous=*/true, DeviceAddrCB,
8906 CustomMapperCB);
8909 /// Check for inner distribute directive.
8910 static const OMPExecutableDirective *
8911 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8912 const auto *CS = D.getInnermostCapturedStmt();
8913 const auto *Body =
8914 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8915 const Stmt *ChildStmt =
8916 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8918 if (const auto *NestedDir =
8919 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8920 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8921 switch (D.getDirectiveKind()) {
8922 case OMPD_target:
8923 // For now, just treat 'target teams loop' as if it's distributed.
8924 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8925 return NestedDir;
8926 if (DKind == OMPD_teams) {
8927 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8928 /*IgnoreCaptured=*/true);
8929 if (!Body)
8930 return nullptr;
8931 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8932 if (const auto *NND =
8933 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8934 DKind = NND->getDirectiveKind();
8935 if (isOpenMPDistributeDirective(DKind))
8936 return NND;
8939 return nullptr;
8940 case OMPD_target_teams:
8941 if (isOpenMPDistributeDirective(DKind))
8942 return NestedDir;
8943 return nullptr;
8944 case OMPD_target_parallel:
8945 case OMPD_target_simd:
8946 case OMPD_target_parallel_for:
8947 case OMPD_target_parallel_for_simd:
8948 return nullptr;
8949 case OMPD_target_teams_distribute:
8950 case OMPD_target_teams_distribute_simd:
8951 case OMPD_target_teams_distribute_parallel_for:
8952 case OMPD_target_teams_distribute_parallel_for_simd:
8953 case OMPD_parallel:
8954 case OMPD_for:
8955 case OMPD_parallel_for:
8956 case OMPD_parallel_master:
8957 case OMPD_parallel_sections:
8958 case OMPD_for_simd:
8959 case OMPD_parallel_for_simd:
8960 case OMPD_cancel:
8961 case OMPD_cancellation_point:
8962 case OMPD_ordered:
8963 case OMPD_threadprivate:
8964 case OMPD_allocate:
8965 case OMPD_task:
8966 case OMPD_simd:
8967 case OMPD_tile:
8968 case OMPD_unroll:
8969 case OMPD_sections:
8970 case OMPD_section:
8971 case OMPD_single:
8972 case OMPD_master:
8973 case OMPD_critical:
8974 case OMPD_taskyield:
8975 case OMPD_barrier:
8976 case OMPD_taskwait:
8977 case OMPD_taskgroup:
8978 case OMPD_atomic:
8979 case OMPD_flush:
8980 case OMPD_depobj:
8981 case OMPD_scan:
8982 case OMPD_teams:
8983 case OMPD_target_data:
8984 case OMPD_target_exit_data:
8985 case OMPD_target_enter_data:
8986 case OMPD_distribute:
8987 case OMPD_distribute_simd:
8988 case OMPD_distribute_parallel_for:
8989 case OMPD_distribute_parallel_for_simd:
8990 case OMPD_teams_distribute:
8991 case OMPD_teams_distribute_simd:
8992 case OMPD_teams_distribute_parallel_for:
8993 case OMPD_teams_distribute_parallel_for_simd:
8994 case OMPD_target_update:
8995 case OMPD_declare_simd:
8996 case OMPD_declare_variant:
8997 case OMPD_begin_declare_variant:
8998 case OMPD_end_declare_variant:
8999 case OMPD_declare_target:
9000 case OMPD_end_declare_target:
9001 case OMPD_declare_reduction:
9002 case OMPD_declare_mapper:
9003 case OMPD_taskloop:
9004 case OMPD_taskloop_simd:
9005 case OMPD_master_taskloop:
9006 case OMPD_master_taskloop_simd:
9007 case OMPD_parallel_master_taskloop:
9008 case OMPD_parallel_master_taskloop_simd:
9009 case OMPD_requires:
9010 case OMPD_metadirective:
9011 case OMPD_unknown:
9012 default:
9013 llvm_unreachable("Unexpected directive.");
9017 return nullptr;
9020 /// Emit the user-defined mapper function. The code generation follows the
9021 /// pattern in the example below.
9022 /// \code
9023 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9024 /// void *base, void *begin,
9025 /// int64_t size, int64_t type,
9026 /// void *name = nullptr) {
9027 /// // Allocate space for an array section first or add a base/begin for
9028 /// // pointer dereference.
9029 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9030 /// !maptype.IsDelete)
9031 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9032 /// size*sizeof(Ty), clearToFromMember(type));
9033 /// // Map members.
9034 /// for (unsigned i = 0; i < size; i++) {
9035 /// // For each component specified by this mapper:
9036 /// for (auto c : begin[i]->all_components) {
9037 /// if (c.hasMapper())
9038 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9039 /// c.arg_type, c.arg_name);
9040 /// else
9041 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9042 /// c.arg_begin, c.arg_size, c.arg_type,
9043 /// c.arg_name);
9044 /// }
9045 /// }
9046 /// // Delete the array section.
9047 /// if (size > 1 && maptype.IsDelete)
9048 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9049 /// size*sizeof(Ty), clearToFromMember(type));
9050 /// }
9051 /// \endcode
9052 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9053 CodeGenFunction *CGF) {
9054 if (UDMMap.count(D) > 0)
9055 return;
9056 ASTContext &C = CGM.getContext();
9057 QualType Ty = D->getType();
9058 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9059 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9060 auto *MapperVarDecl =
9061 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9062 SourceLocation Loc = D->getLocation();
9063 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9064 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9066 // Prepare mapper function arguments and attributes.
9067 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9068 C.VoidPtrTy, ImplicitParamDecl::Other);
9069 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9070 ImplicitParamDecl::Other);
9071 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9072 C.VoidPtrTy, ImplicitParamDecl::Other);
9073 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9074 ImplicitParamDecl::Other);
9075 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9076 ImplicitParamDecl::Other);
9077 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9078 ImplicitParamDecl::Other);
9079 FunctionArgList Args;
9080 Args.push_back(&HandleArg);
9081 Args.push_back(&BaseArg);
9082 Args.push_back(&BeginArg);
9083 Args.push_back(&SizeArg);
9084 Args.push_back(&TypeArg);
9085 Args.push_back(&NameArg);
9086 const CGFunctionInfo &FnInfo =
9087 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9088 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9089 SmallString<64> TyStr;
9090 llvm::raw_svector_ostream Out(TyStr);
9091 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9092 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9093 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9094 Name, &CGM.getModule());
9095 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9096 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9097 // Start the mapper function code generation.
9098 CodeGenFunction MapperCGF(CGM);
9099 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9100 // Compute the starting and end addresses of array elements.
9101 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9102 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9103 C.getPointerType(Int64Ty), Loc);
9104 // Prepare common arguments for array initiation and deletion.
9105 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9106 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9107 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9108 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9109 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9110 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9111 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9112 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9113 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9114 // Convert the size in bytes into the number of array elements.
9115 Size = MapperCGF.Builder.CreateExactUDiv(
9116 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9117 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9118 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9119 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9120 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9121 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9122 C.getPointerType(Int64Ty), Loc);
9123 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9124 MapperCGF.GetAddrOfLocalVar(&NameArg),
9125 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9127 // Emit array initiation if this is an array section and \p MapType indicates
9128 // that memory allocation is required.
9129 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9130 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9131 MapName, ElementSize, HeadBB, /*IsInit=*/true);
9133 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9135 // Emit the loop header block.
9136 MapperCGF.EmitBlock(HeadBB);
9137 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9138 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9139 // Evaluate whether the initial condition is satisfied.
9140 llvm::Value *IsEmpty =
9141 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9142 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9143 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9145 // Emit the loop body block.
9146 MapperCGF.EmitBlock(BodyBB);
9147 llvm::BasicBlock *LastBB = BodyBB;
9148 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9149 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9150 PtrPHI->addIncoming(PtrBegin, EntryBB);
9151 Address PtrCurrent(PtrPHI, ElemTy,
9152 MapperCGF.GetAddrOfLocalVar(&BeginArg)
9153 .getAlignment()
9154 .alignmentOfArrayElement(ElementSize));
9155 // Privatize the declared variable of mapper to be the current array element.
9156 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9157 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9158 (void)Scope.Privatize();
9160 // Get map clause information. Fill up the arrays with all mapped variables.
9161 MappableExprsHandler::MapCombinedInfoTy Info;
9162 MappableExprsHandler MEHandler(*D, MapperCGF);
9163 MEHandler.generateAllInfoForMapper(Info);
9165 // Call the runtime API __tgt_mapper_num_components to get the number of
9166 // pre-existing components.
9167 llvm::Value *OffloadingArgs[] = {Handle};
9168 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9169 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9170 OMPRTL___tgt_mapper_num_components),
9171 OffloadingArgs);
9172 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9173 PreviousSize,
9174 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9176 // Fill up the runtime mapper handle for all components.
9177 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9178 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9179 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9180 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9181 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9182 llvm::Value *CurSizeArg = Info.Sizes[I];
9183 llvm::Value *CurNameArg =
9184 (CGM.getCodeGenOpts().getDebugInfo() ==
9185 llvm::codegenoptions::NoDebugInfo)
9186 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9187 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9189 // Extract the MEMBER_OF field from the map type.
9190 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9191 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9192 Info.Types[I]));
9193 llvm::Value *MemberMapType =
9194 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9196 // Combine the map type inherited from user-defined mapper with that
9197 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9198 // bits of the \a MapType, which is the input argument of the mapper
9199 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9200 // bits of MemberMapType.
9201 // [OpenMP 5.0], 1.2.6. map-type decay.
9202 // | alloc | to | from | tofrom | release | delete
9203 // ----------------------------------------------------------
9204 // alloc | alloc | alloc | alloc | alloc | release | delete
9205 // to | alloc | to | alloc | to | release | delete
9206 // from | alloc | alloc | from | from | release | delete
9207 // tofrom | alloc | to | from | tofrom | release | delete
9208 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9209 MapType,
9210 MapperCGF.Builder.getInt64(
9211 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9212 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9213 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9214 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9215 llvm::BasicBlock *AllocElseBB =
9216 MapperCGF.createBasicBlock("omp.type.alloc.else");
9217 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9218 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9219 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9220 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9221 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9222 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9223 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9224 MapperCGF.EmitBlock(AllocBB);
9225 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9226 MemberMapType,
9227 MapperCGF.Builder.getInt64(
9228 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9229 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9230 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9231 MapperCGF.Builder.CreateBr(EndBB);
9232 MapperCGF.EmitBlock(AllocElseBB);
9233 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9234 LeftToFrom,
9235 MapperCGF.Builder.getInt64(
9236 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9237 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9238 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9239 // In case of to, clear OMP_MAP_FROM.
9240 MapperCGF.EmitBlock(ToBB);
9241 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9242 MemberMapType,
9243 MapperCGF.Builder.getInt64(
9244 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9245 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9246 MapperCGF.Builder.CreateBr(EndBB);
9247 MapperCGF.EmitBlock(ToElseBB);
9248 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9249 LeftToFrom,
9250 MapperCGF.Builder.getInt64(
9251 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9252 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9253 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9254 // In case of from, clear OMP_MAP_TO.
9255 MapperCGF.EmitBlock(FromBB);
9256 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9257 MemberMapType,
9258 MapperCGF.Builder.getInt64(
9259 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9260 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9261 // In case of tofrom, do nothing.
9262 MapperCGF.EmitBlock(EndBB);
9263 LastBB = EndBB;
9264 llvm::PHINode *CurMapType =
9265 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9266 CurMapType->addIncoming(AllocMapType, AllocBB);
9267 CurMapType->addIncoming(ToMapType, ToBB);
9268 CurMapType->addIncoming(FromMapType, FromBB);
9269 CurMapType->addIncoming(MemberMapType, ToElseBB);
9271 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9272 CurSizeArg, CurMapType, CurNameArg};
9273 if (Info.Mappers[I]) {
9274 // Call the corresponding mapper function.
9275 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9276 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9277 assert(MapperFunc && "Expect a valid mapper function is available.");
9278 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9279 } else {
9280 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9281 // data structure.
9282 MapperCGF.EmitRuntimeCall(
9283 OMPBuilder.getOrCreateRuntimeFunction(
9284 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9285 OffloadingArgs);
9289 // Update the pointer to point to the next element that needs to be mapped,
9290 // and check whether we have mapped all elements.
9291 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9292 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9293 PtrPHI->addIncoming(PtrNext, LastBB);
9294 llvm::Value *IsDone =
9295 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9296 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9297 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9299 MapperCGF.EmitBlock(ExitBB);
9300 // Emit array deletion if this is an array section and \p MapType indicates
9301 // that deletion is required.
9302 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9303 MapName, ElementSize, DoneBB, /*IsInit=*/false);
9305 // Emit the function exit block.
9306 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9307 MapperCGF.FinishFunction();
9308 UDMMap.try_emplace(D, Fn);
9309 if (CGF) {
9310 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9311 Decls.second.push_back(D);
9315 /// Emit the array initialization or deletion portion for user-defined mapper
9316 /// code generation. First, it evaluates whether an array section is mapped and
9317 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9318 /// true, and \a MapType indicates to not delete this array, array
9319 /// initialization code is generated. If \a IsInit is false, and \a MapType
9320 /// indicates to not this array, array deletion code is generated.
9321 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9322 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9323 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9324 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9325 bool IsInit) {
9326 StringRef Prefix = IsInit ? ".init" : ".del";
9328 // Evaluate if this is an array section.
9329 llvm::BasicBlock *BodyBB =
9330 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9331 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9332 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9333 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9334 MapType,
9335 MapperCGF.Builder.getInt64(
9336 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9337 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9338 llvm::Value *DeleteCond;
9339 llvm::Value *Cond;
9340 if (IsInit) {
9341 // base != begin?
9342 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9343 // IsPtrAndObj?
9344 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9345 MapType,
9346 MapperCGF.Builder.getInt64(
9347 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9348 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9349 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9350 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9351 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9352 DeleteCond = MapperCGF.Builder.CreateIsNull(
9353 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9354 } else {
9355 Cond = IsArray;
9356 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9357 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9359 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9360 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9362 MapperCGF.EmitBlock(BodyBB);
9363 // Get the array size by multiplying element size and element number (i.e., \p
9364 // Size).
9365 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9366 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9367 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9368 // memory allocation/deletion purpose only.
9369 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9370 MapType,
9371 MapperCGF.Builder.getInt64(
9372 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9373 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9374 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9375 MapTypeArg = MapperCGF.Builder.CreateOr(
9376 MapTypeArg,
9377 MapperCGF.Builder.getInt64(
9378 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9379 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9381 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9382 // data structure.
9383 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9384 ArraySize, MapTypeArg, MapName};
9385 MapperCGF.EmitRuntimeCall(
9386 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9387 OMPRTL___tgt_push_mapper_component),
9388 OffloadingArgs);
9391 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9392 const OMPDeclareMapperDecl *D) {
9393 auto I = UDMMap.find(D);
9394 if (I != UDMMap.end())
9395 return I->second;
9396 emitUserDefinedMapper(D);
9397 return UDMMap.lookup(D);
9400 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9401 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9402 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9403 const OMPLoopDirective &D)>
9404 SizeEmitter) {
9405 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9406 const OMPExecutableDirective *TD = &D;
9407 // Get nested teams distribute kind directive, if any.
9408 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9409 Kind != OMPD_target_teams_loop)
9410 TD = getNestedDistributeDirective(CGM.getContext(), D);
9411 if (!TD)
9412 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9414 const auto *LD = cast<OMPLoopDirective>(TD);
9415 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9416 return NumIterations;
9417 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9420 static void
9421 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9422 const OMPExecutableDirective &D,
9423 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9424 bool RequiresOuterTask, const CapturedStmt &CS,
9425 bool OffloadingMandatory, CodeGenFunction &CGF) {
9426 if (OffloadingMandatory) {
9427 CGF.Builder.CreateUnreachable();
9428 } else {
9429 if (RequiresOuterTask) {
9430 CapturedVars.clear();
9431 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9433 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9434 CapturedVars);
9438 static llvm::Value *emitDeviceID(
9439 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9440 CodeGenFunction &CGF) {
9441 // Emit device ID if any.
9442 llvm::Value *DeviceID;
9443 if (Device.getPointer()) {
9444 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9445 Device.getInt() == OMPC_DEVICE_device_num) &&
9446 "Expected device_num modifier.");
9447 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9448 DeviceID =
9449 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9450 } else {
9451 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9453 return DeviceID;
9456 llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9457 CodeGenFunction &CGF) {
9458 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9460 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9461 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9462 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9463 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9464 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9465 /*isSigned=*/false);
9467 return DynCGroupMem;
9470 static void emitTargetCallKernelLaunch(
9471 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9472 const OMPExecutableDirective &D,
9473 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9474 const CapturedStmt &CS, bool OffloadingMandatory,
9475 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9476 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9477 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9478 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9479 const OMPLoopDirective &D)>
9480 SizeEmitter,
9481 CodeGenFunction &CGF, CodeGenModule &CGM) {
9482 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9484 // Fill up the arrays with all the captured variables.
9485 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9487 // Get mappable expression information.
9488 MappableExprsHandler MEHandler(D, CGF);
9489 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9490 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9492 auto RI = CS.getCapturedRecordDecl()->field_begin();
9493 auto *CV = CapturedVars.begin();
9494 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9495 CE = CS.capture_end();
9496 CI != CE; ++CI, ++RI, ++CV) {
9497 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9498 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9500 // VLA sizes are passed to the outlined region by copy and do not have map
9501 // information associated.
9502 if (CI->capturesVariableArrayType()) {
9503 CurInfo.Exprs.push_back(nullptr);
9504 CurInfo.BasePointers.push_back(*CV);
9505 CurInfo.DevicePtrDecls.push_back(nullptr);
9506 CurInfo.DevicePointers.push_back(
9507 MappableExprsHandler::DeviceInfoTy::None);
9508 CurInfo.Pointers.push_back(*CV);
9509 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9510 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9511 // Copy to the device as an argument. No need to retrieve it.
9512 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9513 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9514 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9515 CurInfo.Mappers.push_back(nullptr);
9516 } else {
9517 // If we have any information in the map clause, we use it, otherwise we
9518 // just do a default mapping.
9519 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9520 if (!CI->capturesThis())
9521 MappedVarSet.insert(CI->getCapturedVar());
9522 else
9523 MappedVarSet.insert(nullptr);
9524 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9525 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9526 // Generate correct mapping for variables captured by reference in
9527 // lambdas.
9528 if (CI->capturesVariable())
9529 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9530 CurInfo, LambdaPointers);
9532 // We expect to have at least an element of information for this capture.
9533 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9534 "Non-existing map pointer for capture!");
9535 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9536 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9537 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9538 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9539 "Inconsistent map information sizes!");
9541 // If there is an entry in PartialStruct it means we have a struct with
9542 // individual members mapped. Emit an extra combined entry.
9543 if (PartialStruct.Base.isValid()) {
9544 CombinedInfo.append(PartialStruct.PreliminaryMapData);
9545 MEHandler.emitCombinedEntry(
9546 CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9547 nullptr, !PartialStruct.PreliminaryMapData.BasePointers.empty());
9550 // We need to append the results of this capture to what we already have.
9551 CombinedInfo.append(CurInfo);
9553 // Adjust MEMBER_OF flags for the lambdas captures.
9554 MEHandler.adjustMemberOfForLambdaCaptures(
9555 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
9556 CombinedInfo.Types);
9557 // Map any list items in a map clause that were not captures because they
9558 // weren't referenced within the construct.
9559 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
9561 CGOpenMPRuntime::TargetDataInfo Info;
9562 // Fill up the arrays and create the arguments.
9563 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9564 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9565 llvm::codegenoptions::NoDebugInfo;
9566 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9567 EmitDebug,
9568 /*ForEndCall=*/false);
9570 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9571 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9572 CGF.VoidPtrTy, CGM.getPointerAlign());
9573 InputInfo.PointersArray =
9574 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9575 InputInfo.SizesArray =
9576 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9577 InputInfo.MappersArray =
9578 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9579 MapTypesArray = Info.RTArgs.MapTypesArray;
9580 MapNamesArray = Info.RTArgs.MapNamesArray;
9582 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9583 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9584 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9585 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9586 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9588 if (IsReverseOffloading) {
9589 // Reverse offloading is not supported, so just execute on the host.
9590 // FIXME: This fallback solution is incorrect since it ignores the
9591 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9592 // assert here and ensure SEMA emits an error.
9593 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9594 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9595 return;
9598 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9599 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9601 llvm::Value *BasePointersArray = InputInfo.BasePointersArray.getPointer();
9602 llvm::Value *PointersArray = InputInfo.PointersArray.getPointer();
9603 llvm::Value *SizesArray = InputInfo.SizesArray.getPointer();
9604 llvm::Value *MappersArray = InputInfo.MappersArray.getPointer();
9606 auto &&EmitTargetCallFallbackCB =
9607 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9608 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9609 -> llvm::OpenMPIRBuilder::InsertPointTy {
9610 CGF.Builder.restoreIP(IP);
9611 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9612 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9613 return CGF.Builder.saveIP();
9616 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9617 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9618 llvm::Value *NumThreads =
9619 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9620 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9621 llvm::Value *NumIterations =
9622 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9623 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9624 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9625 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9627 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9628 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9629 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9631 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9632 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9633 DynCGGroupMem, HasNoWait);
9635 CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9636 CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9637 DeviceID, RTLoc, AllocaIP));
9640 if (RequiresOuterTask)
9641 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9642 else
9643 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9646 static void
9647 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9648 const OMPExecutableDirective &D,
9649 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9650 bool RequiresOuterTask, const CapturedStmt &CS,
9651 bool OffloadingMandatory, CodeGenFunction &CGF) {
9653 // Notify that the host version must be executed.
9654 auto &&ElseGen =
9655 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9656 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9657 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9658 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9661 if (RequiresOuterTask) {
9662 CodeGenFunction::OMPTargetDataInfo InputInfo;
9663 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9664 } else {
9665 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9669 void CGOpenMPRuntime::emitTargetCall(
9670 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9671 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9672 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9673 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9674 const OMPLoopDirective &D)>
9675 SizeEmitter) {
9676 if (!CGF.HaveInsertPoint())
9677 return;
9679 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9680 CGM.getLangOpts().OpenMPOffloadMandatory;
9682 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9684 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9685 D.hasClausesOfKind<OMPNowaitClause>() ||
9686 D.hasClausesOfKind<OMPInReductionClause>();
9687 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9688 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9689 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9690 PrePostActionTy &) {
9691 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9693 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9695 CodeGenFunction::OMPTargetDataInfo InputInfo;
9696 llvm::Value *MapTypesArray = nullptr;
9697 llvm::Value *MapNamesArray = nullptr;
9699 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9700 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9701 OutlinedFnID, &InputInfo, &MapTypesArray,
9702 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9703 PrePostActionTy &) {
9704 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9705 RequiresOuterTask, CS, OffloadingMandatory,
9706 Device, OutlinedFnID, InputInfo, MapTypesArray,
9707 MapNamesArray, SizeEmitter, CGF, CGM);
9710 auto &&TargetElseGen =
9711 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9712 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9713 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9714 CS, OffloadingMandatory, CGF);
9717 // If we have a target function ID it means that we need to support
9718 // offloading, otherwise, just execute on the host. We need to execute on host
9719 // regardless of the conditional in the if clause if, e.g., the user do not
9720 // specify target triples.
9721 if (OutlinedFnID) {
9722 if (IfCond) {
9723 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9724 } else {
9725 RegionCodeGenTy ThenRCG(TargetThenGen);
9726 ThenRCG(CGF);
9728 } else {
9729 RegionCodeGenTy ElseRCG(TargetElseGen);
9730 ElseRCG(CGF);
9734 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9735 StringRef ParentName) {
9736 if (!S)
9737 return;
9739 // Codegen OMP target directives that offload compute to the device.
9740 bool RequiresDeviceCodegen =
9741 isa<OMPExecutableDirective>(S) &&
9742 isOpenMPTargetExecutionDirective(
9743 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9745 if (RequiresDeviceCodegen) {
9746 const auto &E = *cast<OMPExecutableDirective>(S);
9748 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9749 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9751 // Is this a target region that should not be emitted as an entry point? If
9752 // so just signal we are done with this target region.
9753 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9754 return;
9756 switch (E.getDirectiveKind()) {
9757 case OMPD_target:
9758 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9759 cast<OMPTargetDirective>(E));
9760 break;
9761 case OMPD_target_parallel:
9762 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9763 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9764 break;
9765 case OMPD_target_teams:
9766 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9767 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9768 break;
9769 case OMPD_target_teams_distribute:
9770 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9771 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9772 break;
9773 case OMPD_target_teams_distribute_simd:
9774 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9775 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9776 break;
9777 case OMPD_target_parallel_for:
9778 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9779 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9780 break;
9781 case OMPD_target_parallel_for_simd:
9782 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9783 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9784 break;
9785 case OMPD_target_simd:
9786 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9787 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9788 break;
9789 case OMPD_target_teams_distribute_parallel_for:
9790 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9791 CGM, ParentName,
9792 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9793 break;
9794 case OMPD_target_teams_distribute_parallel_for_simd:
9795 CodeGenFunction::
9796 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9797 CGM, ParentName,
9798 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9799 break;
9800 case OMPD_target_teams_loop:
9801 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9802 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9803 break;
9804 case OMPD_target_parallel_loop:
9805 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9806 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9807 break;
9808 case OMPD_parallel:
9809 case OMPD_for:
9810 case OMPD_parallel_for:
9811 case OMPD_parallel_master:
9812 case OMPD_parallel_sections:
9813 case OMPD_for_simd:
9814 case OMPD_parallel_for_simd:
9815 case OMPD_cancel:
9816 case OMPD_cancellation_point:
9817 case OMPD_ordered:
9818 case OMPD_threadprivate:
9819 case OMPD_allocate:
9820 case OMPD_task:
9821 case OMPD_simd:
9822 case OMPD_tile:
9823 case OMPD_unroll:
9824 case OMPD_sections:
9825 case OMPD_section:
9826 case OMPD_single:
9827 case OMPD_master:
9828 case OMPD_critical:
9829 case OMPD_taskyield:
9830 case OMPD_barrier:
9831 case OMPD_taskwait:
9832 case OMPD_taskgroup:
9833 case OMPD_atomic:
9834 case OMPD_flush:
9835 case OMPD_depobj:
9836 case OMPD_scan:
9837 case OMPD_teams:
9838 case OMPD_target_data:
9839 case OMPD_target_exit_data:
9840 case OMPD_target_enter_data:
9841 case OMPD_distribute:
9842 case OMPD_distribute_simd:
9843 case OMPD_distribute_parallel_for:
9844 case OMPD_distribute_parallel_for_simd:
9845 case OMPD_teams_distribute:
9846 case OMPD_teams_distribute_simd:
9847 case OMPD_teams_distribute_parallel_for:
9848 case OMPD_teams_distribute_parallel_for_simd:
9849 case OMPD_target_update:
9850 case OMPD_declare_simd:
9851 case OMPD_declare_variant:
9852 case OMPD_begin_declare_variant:
9853 case OMPD_end_declare_variant:
9854 case OMPD_declare_target:
9855 case OMPD_end_declare_target:
9856 case OMPD_declare_reduction:
9857 case OMPD_declare_mapper:
9858 case OMPD_taskloop:
9859 case OMPD_taskloop_simd:
9860 case OMPD_master_taskloop:
9861 case OMPD_master_taskloop_simd:
9862 case OMPD_parallel_master_taskloop:
9863 case OMPD_parallel_master_taskloop_simd:
9864 case OMPD_requires:
9865 case OMPD_metadirective:
9866 case OMPD_unknown:
9867 default:
9868 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9870 return;
9873 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9874 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9875 return;
9877 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9878 return;
9881 // If this is a lambda function, look into its body.
9882 if (const auto *L = dyn_cast<LambdaExpr>(S))
9883 S = L->getBody();
9885 // Keep looking for target regions recursively.
9886 for (const Stmt *II : S->children())
9887 scanForTargetRegionsFunctions(II, ParentName);
9890 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9891 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9892 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9893 if (!DevTy)
9894 return false;
9895 // Do not emit device_type(nohost) functions for the host.
9896 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9897 return true;
9898 // Do not emit device_type(host) functions for the device.
9899 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9900 return true;
9901 return false;
9904 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9905 // If emitting code for the host, we do not process FD here. Instead we do
9906 // the normal code generation.
9907 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9908 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9909 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9910 CGM.getLangOpts().OpenMPIsTargetDevice))
9911 return true;
9912 return false;
9915 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9916 // Try to detect target regions in the function.
9917 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9918 StringRef Name = CGM.getMangledName(GD);
9919 scanForTargetRegionsFunctions(FD->getBody(), Name);
9920 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9921 CGM.getLangOpts().OpenMPIsTargetDevice))
9922 return true;
9925 // Do not to emit function if it is not marked as declare target.
9926 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9927 AlreadyEmittedTargetDecls.count(VD) == 0;
9930 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9931 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9932 CGM.getLangOpts().OpenMPIsTargetDevice))
9933 return true;
9935 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9936 return false;
9938 // Check if there are Ctors/Dtors in this declaration and look for target
9939 // regions in it. We use the complete variant to produce the kernel name
9940 // mangling.
9941 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9942 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9943 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9944 StringRef ParentName =
9945 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9946 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9948 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9949 StringRef ParentName =
9950 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9951 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9955 // Do not to emit variable if it is not marked as declare target.
9956 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9957 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9958 cast<VarDecl>(GD.getDecl()));
9959 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9960 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9961 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9962 HasRequiresUnifiedSharedMemory)) {
9963 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9964 return true;
9966 return false;
9969 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9970 llvm::Constant *Addr) {
9971 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9972 !CGM.getLangOpts().OpenMPIsTargetDevice)
9973 return;
9975 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9976 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9978 // If this is an 'extern' declaration we defer to the canonical definition and
9979 // do not emit an offloading entry.
9980 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
9981 VD->hasExternalStorage())
9982 return;
9984 if (!Res) {
9985 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9986 // Register non-target variables being emitted in device code (debug info
9987 // may cause this).
9988 StringRef VarName = CGM.getMangledName(VD);
9989 EmittedNonTargetVariables.try_emplace(VarName, Addr);
9991 return;
9994 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
9995 auto LinkageForVariable = [&VD, this]() {
9996 return CGM.getLLVMLinkageVarDefinition(VD);
9999 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10000 OMPBuilder.registerTargetGlobalVariable(
10001 convertCaptureClause(VD), convertDeviceClause(VD),
10002 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10003 VD->isExternallyVisible(),
10004 getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
10005 VD->getCanonicalDecl()->getBeginLoc()),
10006 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10007 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10008 CGM.getTypes().ConvertTypeForMem(
10009 CGM.getContext().getPointerType(VD->getType())),
10010 Addr);
10012 for (auto *ref : GeneratedRefs)
10013 CGM.addCompilerUsedGlobal(ref);
10015 return;
10018 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10019 if (isa<FunctionDecl>(GD.getDecl()) ||
10020 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10021 return emitTargetFunctions(GD);
10023 return emitTargetGlobalVariable(GD);
10026 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10027 for (const VarDecl *VD : DeferredGlobalVariables) {
10028 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10029 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10030 if (!Res)
10031 continue;
10032 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10033 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10034 !HasRequiresUnifiedSharedMemory) {
10035 CGM.EmitGlobal(VD);
10036 } else {
10037 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10038 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10039 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10040 HasRequiresUnifiedSharedMemory)) &&
10041 "Expected link clause or to clause with unified memory.");
10042 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10047 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10048 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10049 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10050 " Expected target-based directive.");
10053 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10054 for (const OMPClause *Clause : D->clauselists()) {
10055 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10056 HasRequiresUnifiedSharedMemory = true;
10057 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10058 } else if (const auto *AC =
10059 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10060 switch (AC->getAtomicDefaultMemOrderKind()) {
10061 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10062 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10063 break;
10064 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10065 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10066 break;
10067 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10068 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10069 break;
10070 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10071 break;
10077 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10078 return RequiresAtomicOrdering;
10081 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10082 LangAS &AS) {
10083 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10084 return false;
10085 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10086 switch(A->getAllocatorType()) {
10087 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10088 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10089 // Not supported, fallback to the default mem space.
10090 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10091 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10092 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10093 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10094 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10095 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10096 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10097 AS = LangAS::Default;
10098 return true;
10099 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10100 llvm_unreachable("Expected predefined allocator for the variables with the "
10101 "static storage.");
10103 return false;
10106 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10107 return HasRequiresUnifiedSharedMemory;
10110 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10111 CodeGenModule &CGM)
10112 : CGM(CGM) {
10113 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10114 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10115 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10119 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10120 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10121 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10124 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10125 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10126 return true;
10128 const auto *D = cast<FunctionDecl>(GD.getDecl());
10129 // Do not to emit function if it is marked as declare target as it was already
10130 // emitted.
10131 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10132 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10133 if (auto *F = dyn_cast_or_null<llvm::Function>(
10134 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10135 return !F->isDeclaration();
10136 return false;
10138 return true;
10141 return !AlreadyEmittedTargetDecls.insert(D).second;
10144 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10145 // If we don't have entries or if we are emitting code for the device, we
10146 // don't need to do anything.
10147 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10148 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsTargetDevice ||
10149 (OMPBuilder.OffloadInfoManager.empty() &&
10150 !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion))
10151 return nullptr;
10153 // Create and register the function that handles the requires directives.
10154 ASTContext &C = CGM.getContext();
10156 llvm::Function *RequiresRegFn;
10158 CodeGenFunction CGF(CGM);
10159 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10160 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10161 std::string ReqName = getName({"omp_offloading", "requires_reg"});
10162 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10163 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10164 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10165 // TODO: check for other requires clauses.
10166 // The requires directive takes effect only when a target region is
10167 // present in the compilation unit. Otherwise it is ignored and not
10168 // passed to the runtime. This avoids the runtime from throwing an error
10169 // for mismatching requires clauses across compilation units that don't
10170 // contain at least 1 target region.
10171 assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion ||
10172 !OMPBuilder.OffloadInfoManager.empty()) &&
10173 "Target or declare target region expected.");
10174 if (HasRequiresUnifiedSharedMemory)
10175 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10176 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10177 CGM.getModule(), OMPRTL___tgt_register_requires),
10178 llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10179 CGF.FinishFunction();
10181 return RequiresRegFn;
10184 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10185 const OMPExecutableDirective &D,
10186 SourceLocation Loc,
10187 llvm::Function *OutlinedFn,
10188 ArrayRef<llvm::Value *> CapturedVars) {
10189 if (!CGF.HaveInsertPoint())
10190 return;
10192 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10193 CodeGenFunction::RunCleanupsScope Scope(CGF);
10195 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10196 llvm::Value *Args[] = {
10197 RTLoc,
10198 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10199 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10200 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10201 RealArgs.append(std::begin(Args), std::end(Args));
10202 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10204 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10205 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10206 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10209 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10210 const Expr *NumTeams,
10211 const Expr *ThreadLimit,
10212 SourceLocation Loc) {
10213 if (!CGF.HaveInsertPoint())
10214 return;
10216 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10218 llvm::Value *NumTeamsVal =
10219 NumTeams
10220 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10221 CGF.CGM.Int32Ty, /* isSigned = */ true)
10222 : CGF.Builder.getInt32(0);
10224 llvm::Value *ThreadLimitVal =
10225 ThreadLimit
10226 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10227 CGF.CGM.Int32Ty, /* isSigned = */ true)
10228 : CGF.Builder.getInt32(0);
10230 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10231 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10232 ThreadLimitVal};
10233 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10234 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10235 PushNumTeamsArgs);
10238 void CGOpenMPRuntime::emitTargetDataCalls(
10239 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10240 const Expr *Device, const RegionCodeGenTy &CodeGen,
10241 CGOpenMPRuntime::TargetDataInfo &Info) {
10242 if (!CGF.HaveInsertPoint())
10243 return;
10245 // Action used to replace the default codegen action and turn privatization
10246 // off.
10247 PrePostActionTy NoPrivAction;
10249 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10250 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10251 CGF.AllocaInsertPt->getIterator());
10252 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10253 CGF.Builder.GetInsertPoint());
10254 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10256 llvm::Value *IfCondVal = nullptr;
10257 if (IfCond)
10258 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10260 // Emit device ID if any.
10261 llvm::Value *DeviceID = nullptr;
10262 if (Device) {
10263 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10264 CGF.Int64Ty, /*isSigned=*/true);
10265 } else {
10266 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10269 // Fill up the arrays with all the mapped variables.
10270 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10271 auto GenMapInfoCB =
10272 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10273 CGF.Builder.restoreIP(CodeGenIP);
10274 // Get map clause information.
10275 MappableExprsHandler MEHandler(D, CGF);
10276 MEHandler.generateAllInfo(CombinedInfo);
10278 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10279 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10281 if (CGM.getCodeGenOpts().getDebugInfo() !=
10282 llvm::codegenoptions::NoDebugInfo) {
10283 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10284 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10285 FillInfoMap);
10288 return CombinedInfo;
10290 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10291 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10292 CGF.Builder.restoreIP(CodeGenIP);
10293 switch (BodyGenType) {
10294 case BodyGenTy::Priv:
10295 if (!Info.CaptureDeviceAddrMap.empty())
10296 CodeGen(CGF);
10297 break;
10298 case BodyGenTy::DupNoPriv:
10299 if (!Info.CaptureDeviceAddrMap.empty()) {
10300 CodeGen.setAction(NoPrivAction);
10301 CodeGen(CGF);
10303 break;
10304 case BodyGenTy::NoPriv:
10305 if (Info.CaptureDeviceAddrMap.empty()) {
10306 CodeGen.setAction(NoPrivAction);
10307 CodeGen(CGF);
10309 break;
10311 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10312 CGF.Builder.GetInsertPoint());
10315 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10316 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10317 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10321 auto CustomMapperCB = [&](unsigned int I) {
10322 llvm::Value *MFunc = nullptr;
10323 if (CombinedInfo.Mappers[I]) {
10324 Info.HasMapper = true;
10325 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10326 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10328 return MFunc;
10331 // Source location for the ident struct
10332 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10334 CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10335 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10336 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10339 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10340 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10341 const Expr *Device) {
10342 if (!CGF.HaveInsertPoint())
10343 return;
10345 assert((isa<OMPTargetEnterDataDirective>(D) ||
10346 isa<OMPTargetExitDataDirective>(D) ||
10347 isa<OMPTargetUpdateDirective>(D)) &&
10348 "Expecting either target enter, exit data, or update directives.");
10350 CodeGenFunction::OMPTargetDataInfo InputInfo;
10351 llvm::Value *MapTypesArray = nullptr;
10352 llvm::Value *MapNamesArray = nullptr;
10353 // Generate the code for the opening of the data environment.
10354 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10355 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10356 // Emit device ID if any.
10357 llvm::Value *DeviceID = nullptr;
10358 if (Device) {
10359 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10360 CGF.Int64Ty, /*isSigned=*/true);
10361 } else {
10362 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10365 // Emit the number of elements in the offloading arrays.
10366 llvm::Constant *PointerNum =
10367 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10369 // Source location for the ident struct
10370 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10372 llvm::Value *OffloadingArgs[] = {RTLoc,
10373 DeviceID,
10374 PointerNum,
10375 InputInfo.BasePointersArray.getPointer(),
10376 InputInfo.PointersArray.getPointer(),
10377 InputInfo.SizesArray.getPointer(),
10378 MapTypesArray,
10379 MapNamesArray,
10380 InputInfo.MappersArray.getPointer()};
10382 // Select the right runtime function call for each standalone
10383 // directive.
10384 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10385 RuntimeFunction RTLFn;
10386 switch (D.getDirectiveKind()) {
10387 case OMPD_target_enter_data:
10388 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10389 : OMPRTL___tgt_target_data_begin_mapper;
10390 break;
10391 case OMPD_target_exit_data:
10392 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10393 : OMPRTL___tgt_target_data_end_mapper;
10394 break;
10395 case OMPD_target_update:
10396 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10397 : OMPRTL___tgt_target_data_update_mapper;
10398 break;
10399 case OMPD_parallel:
10400 case OMPD_for:
10401 case OMPD_parallel_for:
10402 case OMPD_parallel_master:
10403 case OMPD_parallel_sections:
10404 case OMPD_for_simd:
10405 case OMPD_parallel_for_simd:
10406 case OMPD_cancel:
10407 case OMPD_cancellation_point:
10408 case OMPD_ordered:
10409 case OMPD_threadprivate:
10410 case OMPD_allocate:
10411 case OMPD_task:
10412 case OMPD_simd:
10413 case OMPD_tile:
10414 case OMPD_unroll:
10415 case OMPD_sections:
10416 case OMPD_section:
10417 case OMPD_single:
10418 case OMPD_master:
10419 case OMPD_critical:
10420 case OMPD_taskyield:
10421 case OMPD_barrier:
10422 case OMPD_taskwait:
10423 case OMPD_taskgroup:
10424 case OMPD_atomic:
10425 case OMPD_flush:
10426 case OMPD_depobj:
10427 case OMPD_scan:
10428 case OMPD_teams:
10429 case OMPD_target_data:
10430 case OMPD_distribute:
10431 case OMPD_distribute_simd:
10432 case OMPD_distribute_parallel_for:
10433 case OMPD_distribute_parallel_for_simd:
10434 case OMPD_teams_distribute:
10435 case OMPD_teams_distribute_simd:
10436 case OMPD_teams_distribute_parallel_for:
10437 case OMPD_teams_distribute_parallel_for_simd:
10438 case OMPD_declare_simd:
10439 case OMPD_declare_variant:
10440 case OMPD_begin_declare_variant:
10441 case OMPD_end_declare_variant:
10442 case OMPD_declare_target:
10443 case OMPD_end_declare_target:
10444 case OMPD_declare_reduction:
10445 case OMPD_declare_mapper:
10446 case OMPD_taskloop:
10447 case OMPD_taskloop_simd:
10448 case OMPD_master_taskloop:
10449 case OMPD_master_taskloop_simd:
10450 case OMPD_parallel_master_taskloop:
10451 case OMPD_parallel_master_taskloop_simd:
10452 case OMPD_target:
10453 case OMPD_target_simd:
10454 case OMPD_target_teams_distribute:
10455 case OMPD_target_teams_distribute_simd:
10456 case OMPD_target_teams_distribute_parallel_for:
10457 case OMPD_target_teams_distribute_parallel_for_simd:
10458 case OMPD_target_teams:
10459 case OMPD_target_parallel:
10460 case OMPD_target_parallel_for:
10461 case OMPD_target_parallel_for_simd:
10462 case OMPD_requires:
10463 case OMPD_metadirective:
10464 case OMPD_unknown:
10465 default:
10466 llvm_unreachable("Unexpected standalone target data directive.");
10467 break;
10469 CGF.EmitRuntimeCall(
10470 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10471 OffloadingArgs);
10474 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10475 &MapNamesArray](CodeGenFunction &CGF,
10476 PrePostActionTy &) {
10477 // Fill up the arrays with all the mapped variables.
10478 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10480 // Get map clause information.
10481 MappableExprsHandler MEHandler(D, CGF);
10482 MEHandler.generateAllInfo(CombinedInfo);
10484 CGOpenMPRuntime::TargetDataInfo Info;
10485 // Fill up the arrays and create the arguments.
10486 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10487 /*IsNonContiguous=*/true);
10488 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10489 D.hasClausesOfKind<OMPNowaitClause>();
10490 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10491 llvm::codegenoptions::NoDebugInfo;
10492 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10493 EmitDebug,
10494 /*ForEndCall=*/false);
10495 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10496 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10497 CGF.VoidPtrTy, CGM.getPointerAlign());
10498 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10499 CGM.getPointerAlign());
10500 InputInfo.SizesArray =
10501 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10502 InputInfo.MappersArray =
10503 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10504 MapTypesArray = Info.RTArgs.MapTypesArray;
10505 MapNamesArray = Info.RTArgs.MapNamesArray;
10506 if (RequiresOuterTask)
10507 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10508 else
10509 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10512 if (IfCond) {
10513 emitIfClause(CGF, IfCond, TargetThenGen,
10514 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10515 } else {
10516 RegionCodeGenTy ThenRCG(TargetThenGen);
10517 ThenRCG(CGF);
10521 namespace {
10522 /// Kind of parameter in a function with 'declare simd' directive.
10523 enum ParamKindTy {
10524 Linear,
10525 LinearRef,
10526 LinearUVal,
10527 LinearVal,
10528 Uniform,
10529 Vector,
10531 /// Attribute set of the parameter.
10532 struct ParamAttrTy {
10533 ParamKindTy Kind = Vector;
10534 llvm::APSInt StrideOrArg;
10535 llvm::APSInt Alignment;
10536 bool HasVarStride = false;
10538 } // namespace
10540 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10541 ArrayRef<ParamAttrTy> ParamAttrs) {
10542 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10543 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10544 // of that clause. The VLEN value must be power of 2.
10545 // In other case the notion of the function`s "characteristic data type" (CDT)
10546 // is used to compute the vector length.
10547 // CDT is defined in the following order:
10548 // a) For non-void function, the CDT is the return type.
10549 // b) If the function has any non-uniform, non-linear parameters, then the
10550 // CDT is the type of the first such parameter.
10551 // c) If the CDT determined by a) or b) above is struct, union, or class
10552 // type which is pass-by-value (except for the type that maps to the
10553 // built-in complex data type), the characteristic data type is int.
10554 // d) If none of the above three cases is applicable, the CDT is int.
10555 // The VLEN is then determined based on the CDT and the size of vector
10556 // register of that ISA for which current vector version is generated. The
10557 // VLEN is computed using the formula below:
10558 // VLEN = sizeof(vector_register) / sizeof(CDT),
10559 // where vector register size specified in section 3.2.1 Registers and the
10560 // Stack Frame of original AMD64 ABI document.
10561 QualType RetType = FD->getReturnType();
10562 if (RetType.isNull())
10563 return 0;
10564 ASTContext &C = FD->getASTContext();
10565 QualType CDT;
10566 if (!RetType.isNull() && !RetType->isVoidType()) {
10567 CDT = RetType;
10568 } else {
10569 unsigned Offset = 0;
10570 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10571 if (ParamAttrs[Offset].Kind == Vector)
10572 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10573 ++Offset;
10575 if (CDT.isNull()) {
10576 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10577 if (ParamAttrs[I + Offset].Kind == Vector) {
10578 CDT = FD->getParamDecl(I)->getType();
10579 break;
10584 if (CDT.isNull())
10585 CDT = C.IntTy;
10586 CDT = CDT->getCanonicalTypeUnqualified();
10587 if (CDT->isRecordType() || CDT->isUnionType())
10588 CDT = C.IntTy;
10589 return C.getTypeSize(CDT);
10592 /// Mangle the parameter part of the vector function name according to
10593 /// their OpenMP classification. The mangling function is defined in
10594 /// section 4.5 of the AAVFABI(2021Q1).
10595 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10596 SmallString<256> Buffer;
10597 llvm::raw_svector_ostream Out(Buffer);
10598 for (const auto &ParamAttr : ParamAttrs) {
10599 switch (ParamAttr.Kind) {
10600 case Linear:
10601 Out << 'l';
10602 break;
10603 case LinearRef:
10604 Out << 'R';
10605 break;
10606 case LinearUVal:
10607 Out << 'U';
10608 break;
10609 case LinearVal:
10610 Out << 'L';
10611 break;
10612 case Uniform:
10613 Out << 'u';
10614 break;
10615 case Vector:
10616 Out << 'v';
10617 break;
10619 if (ParamAttr.HasVarStride)
10620 Out << "s" << ParamAttr.StrideOrArg;
10621 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10622 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10623 // Don't print the step value if it is not present or if it is
10624 // equal to 1.
10625 if (ParamAttr.StrideOrArg < 0)
10626 Out << 'n' << -ParamAttr.StrideOrArg;
10627 else if (ParamAttr.StrideOrArg != 1)
10628 Out << ParamAttr.StrideOrArg;
10631 if (!!ParamAttr.Alignment)
10632 Out << 'a' << ParamAttr.Alignment;
10635 return std::string(Out.str());
10638 static void
10639 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10640 const llvm::APSInt &VLENVal,
10641 ArrayRef<ParamAttrTy> ParamAttrs,
10642 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10643 struct ISADataTy {
10644 char ISA;
10645 unsigned VecRegSize;
10647 ISADataTy ISAData[] = {
10649 'b', 128
10650 }, // SSE
10652 'c', 256
10653 }, // AVX
10655 'd', 256
10656 }, // AVX2
10658 'e', 512
10659 }, // AVX512
10661 llvm::SmallVector<char, 2> Masked;
10662 switch (State) {
10663 case OMPDeclareSimdDeclAttr::BS_Undefined:
10664 Masked.push_back('N');
10665 Masked.push_back('M');
10666 break;
10667 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10668 Masked.push_back('N');
10669 break;
10670 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10671 Masked.push_back('M');
10672 break;
10674 for (char Mask : Masked) {
10675 for (const ISADataTy &Data : ISAData) {
10676 SmallString<256> Buffer;
10677 llvm::raw_svector_ostream Out(Buffer);
10678 Out << "_ZGV" << Data.ISA << Mask;
10679 if (!VLENVal) {
10680 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10681 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10682 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10683 } else {
10684 Out << VLENVal;
10686 Out << mangleVectorParameters(ParamAttrs);
10687 Out << '_' << Fn->getName();
10688 Fn->addFnAttr(Out.str());
10693 // This are the Functions that are needed to mangle the name of the
10694 // vector functions generated by the compiler, according to the rules
10695 // defined in the "Vector Function ABI specifications for AArch64",
10696 // available at
10697 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10699 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10700 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10701 QT = QT.getCanonicalType();
10703 if (QT->isVoidType())
10704 return false;
10706 if (Kind == ParamKindTy::Uniform)
10707 return false;
10709 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10710 return false;
10712 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10713 !QT->isReferenceType())
10714 return false;
10716 return true;
10719 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10720 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10721 QT = QT.getCanonicalType();
10722 unsigned Size = C.getTypeSize(QT);
10724 // Only scalars and complex within 16 bytes wide set PVB to true.
10725 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10726 return false;
10728 if (QT->isFloatingType())
10729 return true;
10731 if (QT->isIntegerType())
10732 return true;
10734 if (QT->isPointerType())
10735 return true;
10737 // TODO: Add support for complex types (section 3.1.2, item 2).
10739 return false;
10742 /// Computes the lane size (LS) of a return type or of an input parameter,
10743 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10744 /// TODO: Add support for references, section 3.2.1, item 1.
10745 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10746 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10747 QualType PTy = QT.getCanonicalType()->getPointeeType();
10748 if (getAArch64PBV(PTy, C))
10749 return C.getTypeSize(PTy);
10751 if (getAArch64PBV(QT, C))
10752 return C.getTypeSize(QT);
10754 return C.getTypeSize(C.getUIntPtrType());
10757 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10758 // signature of the scalar function, as defined in 3.2.2 of the
10759 // AAVFABI.
10760 static std::tuple<unsigned, unsigned, bool>
10761 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10762 QualType RetType = FD->getReturnType().getCanonicalType();
10764 ASTContext &C = FD->getASTContext();
10766 bool OutputBecomesInput = false;
10768 llvm::SmallVector<unsigned, 8> Sizes;
10769 if (!RetType->isVoidType()) {
10770 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10771 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10772 OutputBecomesInput = true;
10774 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10775 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10776 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10779 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10780 // The LS of a function parameter / return value can only be a power
10781 // of 2, starting from 8 bits, up to 128.
10782 assert(llvm::all_of(Sizes,
10783 [](unsigned Size) {
10784 return Size == 8 || Size == 16 || Size == 32 ||
10785 Size == 64 || Size == 128;
10786 }) &&
10787 "Invalid size");
10789 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10790 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10791 OutputBecomesInput);
10794 // Function used to add the attribute. The parameter `VLEN` is
10795 // templated to allow the use of "x" when targeting scalable functions
10796 // for SVE.
10797 template <typename T>
10798 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10799 char ISA, StringRef ParSeq,
10800 StringRef MangledName, bool OutputBecomesInput,
10801 llvm::Function *Fn) {
10802 SmallString<256> Buffer;
10803 llvm::raw_svector_ostream Out(Buffer);
10804 Out << Prefix << ISA << LMask << VLEN;
10805 if (OutputBecomesInput)
10806 Out << "v";
10807 Out << ParSeq << "_" << MangledName;
10808 Fn->addFnAttr(Out.str());
10811 // Helper function to generate the Advanced SIMD names depending on
10812 // the value of the NDS when simdlen is not present.
10813 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10814 StringRef Prefix, char ISA,
10815 StringRef ParSeq, StringRef MangledName,
10816 bool OutputBecomesInput,
10817 llvm::Function *Fn) {
10818 switch (NDS) {
10819 case 8:
10820 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10821 OutputBecomesInput, Fn);
10822 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10823 OutputBecomesInput, Fn);
10824 break;
10825 case 16:
10826 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10827 OutputBecomesInput, Fn);
10828 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10829 OutputBecomesInput, Fn);
10830 break;
10831 case 32:
10832 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10833 OutputBecomesInput, Fn);
10834 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10835 OutputBecomesInput, Fn);
10836 break;
10837 case 64:
10838 case 128:
10839 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10840 OutputBecomesInput, Fn);
10841 break;
10842 default:
10843 llvm_unreachable("Scalar type is too wide.");
10847 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10848 static void emitAArch64DeclareSimdFunction(
10849 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10850 ArrayRef<ParamAttrTy> ParamAttrs,
10851 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10852 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10854 // Get basic data for building the vector signature.
10855 const auto Data = getNDSWDS(FD, ParamAttrs);
10856 const unsigned NDS = std::get<0>(Data);
10857 const unsigned WDS = std::get<1>(Data);
10858 const bool OutputBecomesInput = std::get<2>(Data);
10860 // Check the values provided via `simdlen` by the user.
10861 // 1. A `simdlen(1)` doesn't produce vector signatures,
10862 if (UserVLEN == 1) {
10863 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10864 DiagnosticsEngine::Warning,
10865 "The clause simdlen(1) has no effect when targeting aarch64.");
10866 CGM.getDiags().Report(SLoc, DiagID);
10867 return;
10870 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10871 // Advanced SIMD output.
10872 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10873 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10874 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10875 "power of 2 when targeting Advanced SIMD.");
10876 CGM.getDiags().Report(SLoc, DiagID);
10877 return;
10880 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10881 // limits.
10882 if (ISA == 's' && UserVLEN != 0) {
10883 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10884 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10885 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10886 "lanes in the architectural constraints "
10887 "for SVE (min is 128-bit, max is "
10888 "2048-bit, by steps of 128-bit)");
10889 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10890 return;
10894 // Sort out parameter sequence.
10895 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10896 StringRef Prefix = "_ZGV";
10897 // Generate simdlen from user input (if any).
10898 if (UserVLEN) {
10899 if (ISA == 's') {
10900 // SVE generates only a masked function.
10901 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10902 OutputBecomesInput, Fn);
10903 } else {
10904 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10905 // Advanced SIMD generates one or two functions, depending on
10906 // the `[not]inbranch` clause.
10907 switch (State) {
10908 case OMPDeclareSimdDeclAttr::BS_Undefined:
10909 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10910 OutputBecomesInput, Fn);
10911 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10912 OutputBecomesInput, Fn);
10913 break;
10914 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10915 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10916 OutputBecomesInput, Fn);
10917 break;
10918 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10919 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10920 OutputBecomesInput, Fn);
10921 break;
10924 } else {
10925 // If no user simdlen is provided, follow the AAVFABI rules for
10926 // generating the vector length.
10927 if (ISA == 's') {
10928 // SVE, section 3.4.1, item 1.
10929 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10930 OutputBecomesInput, Fn);
10931 } else {
10932 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10933 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10934 // two vector names depending on the use of the clause
10935 // `[not]inbranch`.
10936 switch (State) {
10937 case OMPDeclareSimdDeclAttr::BS_Undefined:
10938 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10939 OutputBecomesInput, Fn);
10940 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10941 OutputBecomesInput, Fn);
10942 break;
10943 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10944 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10945 OutputBecomesInput, Fn);
10946 break;
10947 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10948 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10949 OutputBecomesInput, Fn);
10950 break;
10956 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10957 llvm::Function *Fn) {
10958 ASTContext &C = CGM.getContext();
10959 FD = FD->getMostRecentDecl();
10960 while (FD) {
10961 // Map params to their positions in function decl.
10962 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10963 if (isa<CXXMethodDecl>(FD))
10964 ParamPositions.try_emplace(FD, 0);
10965 unsigned ParamPos = ParamPositions.size();
10966 for (const ParmVarDecl *P : FD->parameters()) {
10967 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10968 ++ParamPos;
10970 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10971 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10972 // Mark uniform parameters.
10973 for (const Expr *E : Attr->uniforms()) {
10974 E = E->IgnoreParenImpCasts();
10975 unsigned Pos;
10976 if (isa<CXXThisExpr>(E)) {
10977 Pos = ParamPositions[FD];
10978 } else {
10979 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10980 ->getCanonicalDecl();
10981 auto It = ParamPositions.find(PVD);
10982 assert(It != ParamPositions.end() && "Function parameter not found");
10983 Pos = It->second;
10985 ParamAttrs[Pos].Kind = Uniform;
10987 // Get alignment info.
10988 auto *NI = Attr->alignments_begin();
10989 for (const Expr *E : Attr->aligneds()) {
10990 E = E->IgnoreParenImpCasts();
10991 unsigned Pos;
10992 QualType ParmTy;
10993 if (isa<CXXThisExpr>(E)) {
10994 Pos = ParamPositions[FD];
10995 ParmTy = E->getType();
10996 } else {
10997 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10998 ->getCanonicalDecl();
10999 auto It = ParamPositions.find(PVD);
11000 assert(It != ParamPositions.end() && "Function parameter not found");
11001 Pos = It->second;
11002 ParmTy = PVD->getType();
11004 ParamAttrs[Pos].Alignment =
11005 (*NI)
11006 ? (*NI)->EvaluateKnownConstInt(C)
11007 : llvm::APSInt::getUnsigned(
11008 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11009 .getQuantity());
11010 ++NI;
11012 // Mark linear parameters.
11013 auto *SI = Attr->steps_begin();
11014 auto *MI = Attr->modifiers_begin();
11015 for (const Expr *E : Attr->linears()) {
11016 E = E->IgnoreParenImpCasts();
11017 unsigned Pos;
11018 bool IsReferenceType = false;
11019 // Rescaling factor needed to compute the linear parameter
11020 // value in the mangled name.
11021 unsigned PtrRescalingFactor = 1;
11022 if (isa<CXXThisExpr>(E)) {
11023 Pos = ParamPositions[FD];
11024 auto *P = cast<PointerType>(E->getType());
11025 PtrRescalingFactor = CGM.getContext()
11026 .getTypeSizeInChars(P->getPointeeType())
11027 .getQuantity();
11028 } else {
11029 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11030 ->getCanonicalDecl();
11031 auto It = ParamPositions.find(PVD);
11032 assert(It != ParamPositions.end() && "Function parameter not found");
11033 Pos = It->second;
11034 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11035 PtrRescalingFactor = CGM.getContext()
11036 .getTypeSizeInChars(P->getPointeeType())
11037 .getQuantity();
11038 else if (PVD->getType()->isReferenceType()) {
11039 IsReferenceType = true;
11040 PtrRescalingFactor =
11041 CGM.getContext()
11042 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11043 .getQuantity();
11046 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11047 if (*MI == OMPC_LINEAR_ref)
11048 ParamAttr.Kind = LinearRef;
11049 else if (*MI == OMPC_LINEAR_uval)
11050 ParamAttr.Kind = LinearUVal;
11051 else if (IsReferenceType)
11052 ParamAttr.Kind = LinearVal;
11053 else
11054 ParamAttr.Kind = Linear;
11055 // Assuming a stride of 1, for `linear` without modifiers.
11056 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11057 if (*SI) {
11058 Expr::EvalResult Result;
11059 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11060 if (const auto *DRE =
11061 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11062 if (const auto *StridePVD =
11063 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11064 ParamAttr.HasVarStride = true;
11065 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11066 assert(It != ParamPositions.end() &&
11067 "Function parameter not found");
11068 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11071 } else {
11072 ParamAttr.StrideOrArg = Result.Val.getInt();
11075 // If we are using a linear clause on a pointer, we need to
11076 // rescale the value of linear_step with the byte size of the
11077 // pointee type.
11078 if (!ParamAttr.HasVarStride &&
11079 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11080 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11081 ++SI;
11082 ++MI;
11084 llvm::APSInt VLENVal;
11085 SourceLocation ExprLoc;
11086 const Expr *VLENExpr = Attr->getSimdlen();
11087 if (VLENExpr) {
11088 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11089 ExprLoc = VLENExpr->getExprLoc();
11091 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11092 if (CGM.getTriple().isX86()) {
11093 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11094 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11095 unsigned VLEN = VLENVal.getExtValue();
11096 StringRef MangledName = Fn->getName();
11097 if (CGM.getTarget().hasFeature("sve"))
11098 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11099 MangledName, 's', 128, Fn, ExprLoc);
11100 else if (CGM.getTarget().hasFeature("neon"))
11101 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11102 MangledName, 'n', 128, Fn, ExprLoc);
11105 FD = FD->getPreviousDecl();
11109 namespace {
11110 /// Cleanup action for doacross support.
11111 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11112 public:
11113 static const int DoacrossFinArgs = 2;
11115 private:
11116 llvm::FunctionCallee RTLFn;
11117 llvm::Value *Args[DoacrossFinArgs];
11119 public:
11120 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11121 ArrayRef<llvm::Value *> CallArgs)
11122 : RTLFn(RTLFn) {
11123 assert(CallArgs.size() == DoacrossFinArgs);
11124 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11126 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11127 if (!CGF.HaveInsertPoint())
11128 return;
11129 CGF.EmitRuntimeCall(RTLFn, Args);
11132 } // namespace
11134 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11135 const OMPLoopDirective &D,
11136 ArrayRef<Expr *> NumIterations) {
11137 if (!CGF.HaveInsertPoint())
11138 return;
11140 ASTContext &C = CGM.getContext();
11141 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11142 RecordDecl *RD;
11143 if (KmpDimTy.isNull()) {
11144 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11145 // kmp_int64 lo; // lower
11146 // kmp_int64 up; // upper
11147 // kmp_int64 st; // stride
11148 // };
11149 RD = C.buildImplicitRecord("kmp_dim");
11150 RD->startDefinition();
11151 addFieldToRecordDecl(C, RD, Int64Ty);
11152 addFieldToRecordDecl(C, RD, Int64Ty);
11153 addFieldToRecordDecl(C, RD, Int64Ty);
11154 RD->completeDefinition();
11155 KmpDimTy = C.getRecordType(RD);
11156 } else {
11157 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11159 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11160 QualType ArrayTy =
11161 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11163 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11164 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11165 enum { LowerFD = 0, UpperFD, StrideFD };
11166 // Fill dims with data.
11167 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11168 LValue DimsLVal = CGF.MakeAddrLValue(
11169 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11170 // dims.upper = num_iterations;
11171 LValue UpperLVal = CGF.EmitLValueForField(
11172 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11173 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11174 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11175 Int64Ty, NumIterations[I]->getExprLoc());
11176 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11177 // dims.stride = 1;
11178 LValue StrideLVal = CGF.EmitLValueForField(
11179 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11180 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11181 StrideLVal);
11184 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11185 // kmp_int32 num_dims, struct kmp_dim * dims);
11186 llvm::Value *Args[] = {
11187 emitUpdateLocation(CGF, D.getBeginLoc()),
11188 getThreadID(CGF, D.getBeginLoc()),
11189 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11190 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11191 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11192 CGM.VoidPtrTy)};
11194 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11195 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11196 CGF.EmitRuntimeCall(RTLFn, Args);
11197 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11198 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11199 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11200 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11201 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11202 llvm::ArrayRef(FiniArgs));
11205 template <typename T>
11206 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11207 const T *C, llvm::Value *ULoc,
11208 llvm::Value *ThreadID) {
11209 QualType Int64Ty =
11210 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11211 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11212 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11213 Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11214 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11215 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11216 const Expr *CounterVal = C->getLoopData(I);
11217 assert(CounterVal);
11218 llvm::Value *CntVal = CGF.EmitScalarConversion(
11219 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11220 CounterVal->getExprLoc());
11221 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11222 /*Volatile=*/false, Int64Ty);
11224 llvm::Value *Args[] = {
11225 ULoc, ThreadID, CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11226 llvm::FunctionCallee RTLFn;
11227 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11228 OMPDoacrossKind<T> ODK;
11229 if (ODK.isSource(C)) {
11230 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11231 OMPRTL___kmpc_doacross_post);
11232 } else {
11233 assert(ODK.isSink(C) && "Expect sink modifier.");
11234 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11235 OMPRTL___kmpc_doacross_wait);
11237 CGF.EmitRuntimeCall(RTLFn, Args);
11240 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11241 const OMPDependClause *C) {
11242 return EmitDoacrossOrdered<OMPDependClause>(
11243 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11244 getThreadID(CGF, C->getBeginLoc()));
11247 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11248 const OMPDoacrossClause *C) {
11249 return EmitDoacrossOrdered<OMPDoacrossClause>(
11250 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11251 getThreadID(CGF, C->getBeginLoc()));
11254 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11255 llvm::FunctionCallee Callee,
11256 ArrayRef<llvm::Value *> Args) const {
11257 assert(Loc.isValid() && "Outlined function call location must be valid.");
11258 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11260 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11261 if (Fn->doesNotThrow()) {
11262 CGF.EmitNounwindRuntimeCall(Fn, Args);
11263 return;
11266 CGF.EmitRuntimeCall(Callee, Args);
11269 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11270 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11271 ArrayRef<llvm::Value *> Args) const {
11272 emitCall(CGF, Loc, OutlinedFn, Args);
11275 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11276 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11277 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11278 HasEmittedDeclareTargetRegion = true;
11281 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11282 const VarDecl *NativeParam,
11283 const VarDecl *TargetParam) const {
11284 return CGF.GetAddrOfLocalVar(NativeParam);
11287 /// Return allocator value from expression, or return a null allocator (default
11288 /// when no allocator specified).
11289 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11290 const Expr *Allocator) {
11291 llvm::Value *AllocVal;
11292 if (Allocator) {
11293 AllocVal = CGF.EmitScalarExpr(Allocator);
11294 // According to the standard, the original allocator type is a enum
11295 // (integer). Convert to pointer type, if required.
11296 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11297 CGF.getContext().VoidPtrTy,
11298 Allocator->getExprLoc());
11299 } else {
11300 // If no allocator specified, it defaults to the null allocator.
11301 AllocVal = llvm::Constant::getNullValue(
11302 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11304 return AllocVal;
11307 /// Return the alignment from an allocate directive if present.
11308 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11309 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11311 if (!AllocateAlignment)
11312 return nullptr;
11314 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11317 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11318 const VarDecl *VD) {
11319 if (!VD)
11320 return Address::invalid();
11321 Address UntiedAddr = Address::invalid();
11322 Address UntiedRealAddr = Address::invalid();
11323 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11324 if (It != FunctionToUntiedTaskStackMap.end()) {
11325 const UntiedLocalVarsAddressesMap &UntiedData =
11326 UntiedLocalVarsStack[It->second];
11327 auto I = UntiedData.find(VD);
11328 if (I != UntiedData.end()) {
11329 UntiedAddr = I->second.first;
11330 UntiedRealAddr = I->second.second;
11333 const VarDecl *CVD = VD->getCanonicalDecl();
11334 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11335 // Use the default allocation.
11336 if (!isAllocatableDecl(VD))
11337 return UntiedAddr;
11338 llvm::Value *Size;
11339 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11340 if (CVD->getType()->isVariablyModifiedType()) {
11341 Size = CGF.getTypeSize(CVD->getType());
11342 // Align the size: ((size + align - 1) / align) * align
11343 Size = CGF.Builder.CreateNUWAdd(
11344 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11345 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11346 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11347 } else {
11348 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11349 Size = CGM.getSize(Sz.alignTo(Align));
11351 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11352 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11353 const Expr *Allocator = AA->getAllocator();
11354 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11355 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11356 SmallVector<llvm::Value *, 4> Args;
11357 Args.push_back(ThreadID);
11358 if (Alignment)
11359 Args.push_back(Alignment);
11360 Args.push_back(Size);
11361 Args.push_back(AllocVal);
11362 llvm::omp::RuntimeFunction FnID =
11363 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11364 llvm::Value *Addr = CGF.EmitRuntimeCall(
11365 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11366 getName({CVD->getName(), ".void.addr"}));
11367 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11368 CGM.getModule(), OMPRTL___kmpc_free);
11369 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11370 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11371 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11372 if (UntiedAddr.isValid())
11373 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11375 // Cleanup action for allocate support.
11376 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11377 llvm::FunctionCallee RTLFn;
11378 SourceLocation::UIntTy LocEncoding;
11379 Address Addr;
11380 const Expr *AllocExpr;
11382 public:
11383 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11384 SourceLocation::UIntTy LocEncoding, Address Addr,
11385 const Expr *AllocExpr)
11386 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11387 AllocExpr(AllocExpr) {}
11388 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11389 if (!CGF.HaveInsertPoint())
11390 return;
11391 llvm::Value *Args[3];
11392 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11393 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11394 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11395 Addr.getPointer(), CGF.VoidPtrTy);
11396 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11397 Args[2] = AllocVal;
11398 CGF.EmitRuntimeCall(RTLFn, Args);
11401 Address VDAddr =
11402 UntiedRealAddr.isValid()
11403 ? UntiedRealAddr
11404 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11405 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11406 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11407 VDAddr, Allocator);
11408 if (UntiedRealAddr.isValid())
11409 if (auto *Region =
11410 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11411 Region->emitUntiedSwitch(CGF);
11412 return VDAddr;
11414 return UntiedAddr;
11417 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11418 const VarDecl *VD) const {
11419 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11420 if (It == FunctionToUntiedTaskStackMap.end())
11421 return false;
11422 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11425 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11426 CodeGenModule &CGM, const OMPLoopDirective &S)
11427 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11428 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11429 if (!NeedToPush)
11430 return;
11431 NontemporalDeclsSet &DS =
11432 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11433 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11434 for (const Stmt *Ref : C->private_refs()) {
11435 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11436 const ValueDecl *VD;
11437 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11438 VD = DRE->getDecl();
11439 } else {
11440 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11441 assert((ME->isImplicitCXXThis() ||
11442 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11443 "Expected member of current class.");
11444 VD = ME->getMemberDecl();
11446 DS.insert(VD);
11451 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11452 if (!NeedToPush)
11453 return;
11454 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11457 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11458 CodeGenFunction &CGF,
11459 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11460 std::pair<Address, Address>> &LocalVars)
11461 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11462 if (!NeedToPush)
11463 return;
11464 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11465 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11466 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11469 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11470 if (!NeedToPush)
11471 return;
11472 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11475 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11476 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11478 return llvm::any_of(
11479 CGM.getOpenMPRuntime().NontemporalDeclsStack,
11480 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11483 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11484 const OMPExecutableDirective &S,
11485 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11486 const {
11487 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11488 // Vars in target/task regions must be excluded completely.
11489 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11490 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11491 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11492 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11493 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11494 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11495 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11496 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11499 // Exclude vars in private clauses.
11500 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11501 for (const Expr *Ref : C->varlists()) {
11502 if (!Ref->getType()->isScalarType())
11503 continue;
11504 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11505 if (!DRE)
11506 continue;
11507 NeedToCheckForLPCs.insert(DRE->getDecl());
11510 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11511 for (const Expr *Ref : C->varlists()) {
11512 if (!Ref->getType()->isScalarType())
11513 continue;
11514 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11515 if (!DRE)
11516 continue;
11517 NeedToCheckForLPCs.insert(DRE->getDecl());
11520 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11521 for (const Expr *Ref : C->varlists()) {
11522 if (!Ref->getType()->isScalarType())
11523 continue;
11524 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11525 if (!DRE)
11526 continue;
11527 NeedToCheckForLPCs.insert(DRE->getDecl());
11530 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11531 for (const Expr *Ref : C->varlists()) {
11532 if (!Ref->getType()->isScalarType())
11533 continue;
11534 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11535 if (!DRE)
11536 continue;
11537 NeedToCheckForLPCs.insert(DRE->getDecl());
11540 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11541 for (const Expr *Ref : C->varlists()) {
11542 if (!Ref->getType()->isScalarType())
11543 continue;
11544 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11545 if (!DRE)
11546 continue;
11547 NeedToCheckForLPCs.insert(DRE->getDecl());
11550 for (const Decl *VD : NeedToCheckForLPCs) {
11551 for (const LastprivateConditionalData &Data :
11552 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11553 if (Data.DeclToUniqueName.count(VD) > 0) {
11554 if (!Data.Disabled)
11555 NeedToAddForLPCsAsDisabled.insert(VD);
11556 break;
11562 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11563 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11564 : CGM(CGF.CGM),
11565 Action((CGM.getLangOpts().OpenMP >= 50 &&
11566 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11567 [](const OMPLastprivateClause *C) {
11568 return C->getKind() ==
11569 OMPC_LASTPRIVATE_conditional;
11571 ? ActionToDo::PushAsLastprivateConditional
11572 : ActionToDo::DoNotPush) {
11573 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11574 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11575 return;
11576 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11577 "Expected a push action.");
11578 LastprivateConditionalData &Data =
11579 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11580 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11581 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11582 continue;
11584 for (const Expr *Ref : C->varlists()) {
11585 Data.DeclToUniqueName.insert(std::make_pair(
11586 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11587 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11590 Data.IVLVal = IVLVal;
11591 Data.Fn = CGF.CurFn;
11594 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11595 CodeGenFunction &CGF, const OMPExecutableDirective &S)
11596 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11597 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11598 if (CGM.getLangOpts().OpenMP < 50)
11599 return;
11600 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11601 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11602 if (!NeedToAddForLPCsAsDisabled.empty()) {
11603 Action = ActionToDo::DisableLastprivateConditional;
11604 LastprivateConditionalData &Data =
11605 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11606 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11607 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11608 Data.Fn = CGF.CurFn;
11609 Data.Disabled = true;
11613 CGOpenMPRuntime::LastprivateConditionalRAII
11614 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11615 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11616 return LastprivateConditionalRAII(CGF, S);
11619 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11620 if (CGM.getLangOpts().OpenMP < 50)
11621 return;
11622 if (Action == ActionToDo::DisableLastprivateConditional) {
11623 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11624 "Expected list of disabled private vars.");
11625 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11627 if (Action == ActionToDo::PushAsLastprivateConditional) {
11628 assert(
11629 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11630 "Expected list of lastprivate conditional vars.");
11631 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11635 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11636 const VarDecl *VD) {
11637 ASTContext &C = CGM.getContext();
11638 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11639 if (I == LastprivateConditionalToTypes.end())
11640 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11641 QualType NewType;
11642 const FieldDecl *VDField;
11643 const FieldDecl *FiredField;
11644 LValue BaseLVal;
11645 auto VI = I->getSecond().find(VD);
11646 if (VI == I->getSecond().end()) {
11647 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11648 RD->startDefinition();
11649 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11650 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11651 RD->completeDefinition();
11652 NewType = C.getRecordType(RD);
11653 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11654 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11655 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11656 } else {
11657 NewType = std::get<0>(VI->getSecond());
11658 VDField = std::get<1>(VI->getSecond());
11659 FiredField = std::get<2>(VI->getSecond());
11660 BaseLVal = std::get<3>(VI->getSecond());
11662 LValue FiredLVal =
11663 CGF.EmitLValueForField(BaseLVal, FiredField);
11664 CGF.EmitStoreOfScalar(
11665 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11666 FiredLVal);
11667 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11670 namespace {
11671 /// Checks if the lastprivate conditional variable is referenced in LHS.
11672 class LastprivateConditionalRefChecker final
11673 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11674 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11675 const Expr *FoundE = nullptr;
11676 const Decl *FoundD = nullptr;
11677 StringRef UniqueDeclName;
11678 LValue IVLVal;
11679 llvm::Function *FoundFn = nullptr;
11680 SourceLocation Loc;
11682 public:
11683 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11684 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11685 llvm::reverse(LPM)) {
11686 auto It = D.DeclToUniqueName.find(E->getDecl());
11687 if (It == D.DeclToUniqueName.end())
11688 continue;
11689 if (D.Disabled)
11690 return false;
11691 FoundE = E;
11692 FoundD = E->getDecl()->getCanonicalDecl();
11693 UniqueDeclName = It->second;
11694 IVLVal = D.IVLVal;
11695 FoundFn = D.Fn;
11696 break;
11698 return FoundE == E;
11700 bool VisitMemberExpr(const MemberExpr *E) {
11701 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11702 return false;
11703 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11704 llvm::reverse(LPM)) {
11705 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11706 if (It == D.DeclToUniqueName.end())
11707 continue;
11708 if (D.Disabled)
11709 return false;
11710 FoundE = E;
11711 FoundD = E->getMemberDecl()->getCanonicalDecl();
11712 UniqueDeclName = It->second;
11713 IVLVal = D.IVLVal;
11714 FoundFn = D.Fn;
11715 break;
11717 return FoundE == E;
11719 bool VisitStmt(const Stmt *S) {
11720 for (const Stmt *Child : S->children()) {
11721 if (!Child)
11722 continue;
11723 if (const auto *E = dyn_cast<Expr>(Child))
11724 if (!E->isGLValue())
11725 continue;
11726 if (Visit(Child))
11727 return true;
11729 return false;
11731 explicit LastprivateConditionalRefChecker(
11732 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11733 : LPM(LPM) {}
11734 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11735 getFoundData() const {
11736 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11739 } // namespace
11741 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11742 LValue IVLVal,
11743 StringRef UniqueDeclName,
11744 LValue LVal,
11745 SourceLocation Loc) {
11746 // Last updated loop counter for the lastprivate conditional var.
11747 // int<xx> last_iv = 0;
11748 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11749 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11750 LLIVTy, getName({UniqueDeclName, "iv"}));
11751 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11752 IVLVal.getAlignment().getAsAlign());
11753 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11755 // Last value of the lastprivate conditional.
11756 // decltype(priv_a) last_a;
11757 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11758 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11759 Last->setAlignment(LVal.getAlignment().getAsAlign());
11760 LValue LastLVal = CGF.MakeAddrLValue(
11761 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
11763 // Global loop counter. Required to handle inner parallel-for regions.
11764 // iv
11765 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11767 // #pragma omp critical(a)
11768 // if (last_iv <= iv) {
11769 // last_iv = iv;
11770 // last_a = priv_a;
11771 // }
11772 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11773 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11774 Action.Enter(CGF);
11775 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11776 // (last_iv <= iv) ? Check if the variable is updated and store new
11777 // value in global var.
11778 llvm::Value *CmpRes;
11779 if (IVLVal.getType()->isSignedIntegerType()) {
11780 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11781 } else {
11782 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11783 "Loop iteration variable must be integer.");
11784 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11786 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11787 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11788 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11789 // {
11790 CGF.EmitBlock(ThenBB);
11792 // last_iv = iv;
11793 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11795 // last_a = priv_a;
11796 switch (CGF.getEvaluationKind(LVal.getType())) {
11797 case TEK_Scalar: {
11798 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11799 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11800 break;
11802 case TEK_Complex: {
11803 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11804 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11805 break;
11807 case TEK_Aggregate:
11808 llvm_unreachable(
11809 "Aggregates are not supported in lastprivate conditional.");
11811 // }
11812 CGF.EmitBranch(ExitBB);
11813 // There is no need to emit line number for unconditional branch.
11814 (void)ApplyDebugLocation::CreateEmpty(CGF);
11815 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11818 if (CGM.getLangOpts().OpenMPSimd) {
11819 // Do not emit as a critical region as no parallel region could be emitted.
11820 RegionCodeGenTy ThenRCG(CodeGen);
11821 ThenRCG(CGF);
11822 } else {
11823 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11827 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11828 const Expr *LHS) {
11829 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11830 return;
11831 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11832 if (!Checker.Visit(LHS))
11833 return;
11834 const Expr *FoundE;
11835 const Decl *FoundD;
11836 StringRef UniqueDeclName;
11837 LValue IVLVal;
11838 llvm::Function *FoundFn;
11839 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11840 Checker.getFoundData();
11841 if (FoundFn != CGF.CurFn) {
11842 // Special codegen for inner parallel regions.
11843 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11844 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11845 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11846 "Lastprivate conditional is not found in outer region.");
11847 QualType StructTy = std::get<0>(It->getSecond());
11848 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11849 LValue PrivLVal = CGF.EmitLValue(FoundE);
11850 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11851 PrivLVal.getAddress(CGF),
11852 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11853 CGF.ConvertTypeForMem(StructTy));
11854 LValue BaseLVal =
11855 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11856 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11857 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11858 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11859 FiredLVal, llvm::AtomicOrdering::Unordered,
11860 /*IsVolatile=*/true, /*isInit=*/false);
11861 return;
11864 // Private address of the lastprivate conditional in the current context.
11865 // priv_a
11866 LValue LVal = CGF.EmitLValue(FoundE);
11867 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11868 FoundE->getExprLoc());
11871 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11872 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11873 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11874 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11875 return;
11876 auto Range = llvm::reverse(LastprivateConditionalStack);
11877 auto It = llvm::find_if(
11878 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11879 if (It == Range.end() || It->Fn != CGF.CurFn)
11880 return;
11881 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11882 assert(LPCI != LastprivateConditionalToTypes.end() &&
11883 "Lastprivates must be registered already.");
11884 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11885 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11886 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11887 for (const auto &Pair : It->DeclToUniqueName) {
11888 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11889 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11890 continue;
11891 auto I = LPCI->getSecond().find(Pair.first);
11892 assert(I != LPCI->getSecond().end() &&
11893 "Lastprivate must be rehistered already.");
11894 // bool Cmp = priv_a.Fired != 0;
11895 LValue BaseLVal = std::get<3>(I->getSecond());
11896 LValue FiredLVal =
11897 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11898 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11899 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11900 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11901 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11902 // if (Cmp) {
11903 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11904 CGF.EmitBlock(ThenBB);
11905 Address Addr = CGF.GetAddrOfLocalVar(VD);
11906 LValue LVal;
11907 if (VD->getType()->isReferenceType())
11908 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11909 AlignmentSource::Decl);
11910 else
11911 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11912 AlignmentSource::Decl);
11913 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11914 D.getBeginLoc());
11915 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11916 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11917 // }
11921 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11922 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11923 SourceLocation Loc) {
11924 if (CGF.getLangOpts().OpenMP < 50)
11925 return;
11926 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11927 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11928 "Unknown lastprivate conditional variable.");
11929 StringRef UniqueName = It->second;
11930 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11931 // The variable was not updated in the region - exit.
11932 if (!GV)
11933 return;
11934 LValue LPLVal = CGF.MakeAddrLValue(
11935 Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
11936 PrivLVal.getType().getNonReferenceType());
11937 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11938 CGF.EmitStoreOfScalar(Res, PrivLVal);
11941 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11942 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11943 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11944 const RegionCodeGenTy &CodeGen) {
11945 llvm_unreachable("Not supported in SIMD-only mode");
11948 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11949 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11950 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11951 const RegionCodeGenTy &CodeGen) {
11952 llvm_unreachable("Not supported in SIMD-only mode");
11955 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11956 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11957 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11958 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11959 bool Tied, unsigned &NumberOfParts) {
11960 llvm_unreachable("Not supported in SIMD-only mode");
11963 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11964 SourceLocation Loc,
11965 llvm::Function *OutlinedFn,
11966 ArrayRef<llvm::Value *> CapturedVars,
11967 const Expr *IfCond,
11968 llvm::Value *NumThreads) {
11969 llvm_unreachable("Not supported in SIMD-only mode");
11972 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11973 CodeGenFunction &CGF, StringRef CriticalName,
11974 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11975 const Expr *Hint) {
11976 llvm_unreachable("Not supported in SIMD-only mode");
11979 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11980 const RegionCodeGenTy &MasterOpGen,
11981 SourceLocation Loc) {
11982 llvm_unreachable("Not supported in SIMD-only mode");
11985 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
11986 const RegionCodeGenTy &MasterOpGen,
11987 SourceLocation Loc,
11988 const Expr *Filter) {
11989 llvm_unreachable("Not supported in SIMD-only mode");
11992 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11993 SourceLocation Loc) {
11994 llvm_unreachable("Not supported in SIMD-only mode");
11997 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11998 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11999 SourceLocation Loc) {
12000 llvm_unreachable("Not supported in SIMD-only mode");
12003 void CGOpenMPSIMDRuntime::emitSingleRegion(
12004 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12005 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12006 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12007 ArrayRef<const Expr *> AssignmentOps) {
12008 llvm_unreachable("Not supported in SIMD-only mode");
12011 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12012 const RegionCodeGenTy &OrderedOpGen,
12013 SourceLocation Loc,
12014 bool IsThreads) {
12015 llvm_unreachable("Not supported in SIMD-only mode");
12018 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12019 SourceLocation Loc,
12020 OpenMPDirectiveKind Kind,
12021 bool EmitChecks,
12022 bool ForceSimpleCall) {
12023 llvm_unreachable("Not supported in SIMD-only mode");
12026 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12027 CodeGenFunction &CGF, SourceLocation Loc,
12028 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12029 bool Ordered, const DispatchRTInput &DispatchValues) {
12030 llvm_unreachable("Not supported in SIMD-only mode");
12033 void CGOpenMPSIMDRuntime::emitForStaticInit(
12034 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12035 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12036 llvm_unreachable("Not supported in SIMD-only mode");
12039 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12040 CodeGenFunction &CGF, SourceLocation Loc,
12041 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12042 llvm_unreachable("Not supported in SIMD-only mode");
12045 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12046 SourceLocation Loc,
12047 unsigned IVSize,
12048 bool IVSigned) {
12049 llvm_unreachable("Not supported in SIMD-only mode");
12052 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12053 SourceLocation Loc,
12054 OpenMPDirectiveKind DKind) {
12055 llvm_unreachable("Not supported in SIMD-only mode");
12058 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12059 SourceLocation Loc,
12060 unsigned IVSize, bool IVSigned,
12061 Address IL, Address LB,
12062 Address UB, Address ST) {
12063 llvm_unreachable("Not supported in SIMD-only mode");
12066 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12067 llvm::Value *NumThreads,
12068 SourceLocation Loc) {
12069 llvm_unreachable("Not supported in SIMD-only mode");
12072 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12073 ProcBindKind ProcBind,
12074 SourceLocation Loc) {
12075 llvm_unreachable("Not supported in SIMD-only mode");
12078 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12079 const VarDecl *VD,
12080 Address VDAddr,
12081 SourceLocation Loc) {
12082 llvm_unreachable("Not supported in SIMD-only mode");
12085 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12086 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12087 CodeGenFunction *CGF) {
12088 llvm_unreachable("Not supported in SIMD-only mode");
12091 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12092 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12093 llvm_unreachable("Not supported in SIMD-only mode");
12096 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12097 ArrayRef<const Expr *> Vars,
12098 SourceLocation Loc,
12099 llvm::AtomicOrdering AO) {
12100 llvm_unreachable("Not supported in SIMD-only mode");
12103 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12104 const OMPExecutableDirective &D,
12105 llvm::Function *TaskFunction,
12106 QualType SharedsTy, Address Shareds,
12107 const Expr *IfCond,
12108 const OMPTaskDataTy &Data) {
12109 llvm_unreachable("Not supported in SIMD-only mode");
12112 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12113 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12114 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12115 const Expr *IfCond, const OMPTaskDataTy &Data) {
12116 llvm_unreachable("Not supported in SIMD-only mode");
12119 void CGOpenMPSIMDRuntime::emitReduction(
12120 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12121 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12122 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12123 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12124 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12125 ReductionOps, Options);
12128 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12129 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12130 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12131 llvm_unreachable("Not supported in SIMD-only mode");
12134 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12135 SourceLocation Loc,
12136 bool IsWorksharingReduction) {
12137 llvm_unreachable("Not supported in SIMD-only mode");
12140 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12141 SourceLocation Loc,
12142 ReductionCodeGen &RCG,
12143 unsigned N) {
12144 llvm_unreachable("Not supported in SIMD-only mode");
12147 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12148 SourceLocation Loc,
12149 llvm::Value *ReductionsPtr,
12150 LValue SharedLVal) {
12151 llvm_unreachable("Not supported in SIMD-only mode");
12154 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12155 SourceLocation Loc,
12156 const OMPTaskDataTy &Data) {
12157 llvm_unreachable("Not supported in SIMD-only mode");
12160 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12161 CodeGenFunction &CGF, SourceLocation Loc,
12162 OpenMPDirectiveKind CancelRegion) {
12163 llvm_unreachable("Not supported in SIMD-only mode");
12166 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12167 SourceLocation Loc, const Expr *IfCond,
12168 OpenMPDirectiveKind CancelRegion) {
12169 llvm_unreachable("Not supported in SIMD-only mode");
12172 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12173 const OMPExecutableDirective &D, StringRef ParentName,
12174 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12175 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12176 llvm_unreachable("Not supported in SIMD-only mode");
12179 void CGOpenMPSIMDRuntime::emitTargetCall(
12180 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12181 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12182 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12183 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12184 const OMPLoopDirective &D)>
12185 SizeEmitter) {
12186 llvm_unreachable("Not supported in SIMD-only mode");
12189 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12190 llvm_unreachable("Not supported in SIMD-only mode");
12193 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12194 llvm_unreachable("Not supported in SIMD-only mode");
12197 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12198 return false;
12201 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12202 const OMPExecutableDirective &D,
12203 SourceLocation Loc,
12204 llvm::Function *OutlinedFn,
12205 ArrayRef<llvm::Value *> CapturedVars) {
12206 llvm_unreachable("Not supported in SIMD-only mode");
12209 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12210 const Expr *NumTeams,
12211 const Expr *ThreadLimit,
12212 SourceLocation Loc) {
12213 llvm_unreachable("Not supported in SIMD-only mode");
12216 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12217 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12218 const Expr *Device, const RegionCodeGenTy &CodeGen,
12219 CGOpenMPRuntime::TargetDataInfo &Info) {
12220 llvm_unreachable("Not supported in SIMD-only mode");
12223 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12224 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12225 const Expr *Device) {
12226 llvm_unreachable("Not supported in SIMD-only mode");
12229 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12230 const OMPLoopDirective &D,
12231 ArrayRef<Expr *> NumIterations) {
12232 llvm_unreachable("Not supported in SIMD-only mode");
12235 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12236 const OMPDependClause *C) {
12237 llvm_unreachable("Not supported in SIMD-only mode");
12240 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12241 const OMPDoacrossClause *C) {
12242 llvm_unreachable("Not supported in SIMD-only mode");
12245 const VarDecl *
12246 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12247 const VarDecl *NativeParam) const {
12248 llvm_unreachable("Not supported in SIMD-only mode");
12251 Address
12252 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12253 const VarDecl *NativeParam,
12254 const VarDecl *TargetParam) const {
12255 llvm_unreachable("Not supported in SIMD-only mode");