[OpenACC] Enable 'attach' clause for combined constructs
[llvm-project.git] / clang / lib / CodeGen / CGOpenMPRuntime.cpp
blobcc389974e04081abd45da499366e08b314ef1636
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
11 //===----------------------------------------------------------------------===//
13 #include "CGOpenMPRuntime.h"
14 #include "ABIInfoImpl.h"
15 #include "CGCXXABI.h"
16 #include "CGCleanup.h"
17 #include "CGRecordLayout.h"
18 #include "CodeGenFunction.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/APValue.h"
21 #include "clang/AST/Attr.h"
22 #include "clang/AST/Decl.h"
23 #include "clang/AST/OpenMPClause.h"
24 #include "clang/AST/StmtOpenMP.h"
25 #include "clang/AST/StmtVisitor.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/InstrTypes.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <cstdint>
42 #include <numeric>
43 #include <optional>
45 using namespace clang;
46 using namespace CodeGen;
47 using namespace llvm::omp;
49 namespace {
50 /// Base class for handling code generation inside OpenMP regions.
51 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
52 public:
53 /// Kinds of OpenMP regions used in codegen.
54 enum CGOpenMPRegionKind {
55 /// Region with outlined function for standalone 'parallel'
56 /// directive.
57 ParallelOutlinedRegion,
58 /// Region with outlined function for standalone 'task' directive.
59 TaskOutlinedRegion,
60 /// Region for constructs that do not require function outlining,
61 /// like 'for', 'sections', 'atomic' etc. directives.
62 InlinedRegion,
63 /// Region with outlined function for standalone 'target' directive.
64 TargetRegion,
67 CGOpenMPRegionInfo(const CapturedStmt &CS,
68 const CGOpenMPRegionKind RegionKind,
69 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
70 bool HasCancel)
71 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
72 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
75 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
76 bool HasCancel)
77 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
78 Kind(Kind), HasCancel(HasCancel) {}
80 /// Get a variable or parameter for storing global thread id
81 /// inside OpenMP construct.
82 virtual const VarDecl *getThreadIDVariable() const = 0;
84 /// Emit the captured statement body.
85 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87 /// Get an LValue for the current ThreadID variable.
88 /// \return LValue for thread id variable. This LValue always has type int32*.
89 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97 bool hasCancel() const { return HasCancel; }
99 static bool classof(const CGCapturedStmtInfo *Info) {
100 return Info->getKind() == CR_OpenMP;
103 ~CGOpenMPRegionInfo() override = default;
105 protected:
106 CGOpenMPRegionKind RegionKind;
107 RegionCodeGenTy CodeGen;
108 OpenMPDirectiveKind Kind;
109 bool HasCancel;
112 /// API for captured statement code generation in OpenMP constructs.
113 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
114 public:
115 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
116 const RegionCodeGenTy &CodeGen,
117 OpenMPDirectiveKind Kind, bool HasCancel,
118 StringRef HelperName)
119 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
120 HasCancel),
121 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
122 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
125 /// Get a variable or parameter for storing global thread id
126 /// inside OpenMP construct.
127 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129 /// Get the name of the capture helper.
130 StringRef getHelperName() const override { return HelperName; }
132 static bool classof(const CGCapturedStmtInfo *Info) {
133 return CGOpenMPRegionInfo::classof(Info) &&
134 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
135 ParallelOutlinedRegion;
138 private:
139 /// A variable or parameter storing global thread id for OpenMP
140 /// constructs.
141 const VarDecl *ThreadIDVar;
142 StringRef HelperName;
145 /// API for captured statement code generation in OpenMP constructs.
146 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
147 public:
148 class UntiedTaskActionTy final : public PrePostActionTy {
149 bool Untied;
150 const VarDecl *PartIDVar;
151 const RegionCodeGenTy UntiedCodeGen;
152 llvm::SwitchInst *UntiedSwitch = nullptr;
154 public:
155 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
156 const RegionCodeGenTy &UntiedCodeGen)
157 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
158 void Enter(CodeGenFunction &CGF) override {
159 if (Untied) {
160 // Emit task switching point.
161 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
162 CGF.GetAddrOfLocalVar(PartIDVar),
163 PartIDVar->getType()->castAs<PointerType>());
164 llvm::Value *Res =
165 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
166 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
167 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
168 CGF.EmitBlock(DoneBB);
169 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
170 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
171 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
172 CGF.Builder.GetInsertBlock());
173 emitUntiedSwitch(CGF);
176 void emitUntiedSwitch(CodeGenFunction &CGF) const {
177 if (Untied) {
178 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
179 CGF.GetAddrOfLocalVar(PartIDVar),
180 PartIDVar->getType()->castAs<PointerType>());
181 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
182 PartIdLVal);
183 UntiedCodeGen(CGF);
184 CodeGenFunction::JumpDest CurPoint =
185 CGF.getJumpDestInCurrentScope(".untied.next.");
186 CGF.EmitBranch(CGF.ReturnBlock.getBlock());
187 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
188 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
189 CGF.Builder.GetInsertBlock());
190 CGF.EmitBranchThroughCleanup(CurPoint);
191 CGF.EmitBlock(CurPoint.getBlock());
194 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
197 const VarDecl *ThreadIDVar,
198 const RegionCodeGenTy &CodeGen,
199 OpenMPDirectiveKind Kind, bool HasCancel,
200 const UntiedTaskActionTy &Action)
201 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
202 ThreadIDVar(ThreadIDVar), Action(Action) {
203 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
206 /// Get a variable or parameter for storing global thread id
207 /// inside OpenMP construct.
208 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210 /// Get an LValue for the current ThreadID variable.
211 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213 /// Get the name of the capture helper.
214 StringRef getHelperName() const override { return ".omp_outlined."; }
216 void emitUntiedSwitch(CodeGenFunction &CGF) override {
217 Action.emitUntiedSwitch(CGF);
220 static bool classof(const CGCapturedStmtInfo *Info) {
221 return CGOpenMPRegionInfo::classof(Info) &&
222 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
223 TaskOutlinedRegion;
226 private:
227 /// A variable or parameter storing global thread id for OpenMP
228 /// constructs.
229 const VarDecl *ThreadIDVar;
230 /// Action for emitting code for untied tasks.
231 const UntiedTaskActionTy &Action;
234 /// API for inlined captured statement code generation in OpenMP
235 /// constructs.
236 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
237 public:
238 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
239 const RegionCodeGenTy &CodeGen,
240 OpenMPDirectiveKind Kind, bool HasCancel)
241 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
242 OldCSI(OldCSI),
243 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245 // Retrieve the value of the context parameter.
246 llvm::Value *getContextValue() const override {
247 if (OuterRegionInfo)
248 return OuterRegionInfo->getContextValue();
249 llvm_unreachable("No context value for inlined OpenMP region");
252 void setContextValue(llvm::Value *V) override {
253 if (OuterRegionInfo) {
254 OuterRegionInfo->setContextValue(V);
255 return;
257 llvm_unreachable("No context value for inlined OpenMP region");
260 /// Lookup the captured field decl for a variable.
261 const FieldDecl *lookup(const VarDecl *VD) const override {
262 if (OuterRegionInfo)
263 return OuterRegionInfo->lookup(VD);
264 // If there is no outer outlined region,no need to lookup in a list of
265 // captured variables, we can use the original one.
266 return nullptr;
269 FieldDecl *getThisFieldDecl() const override {
270 if (OuterRegionInfo)
271 return OuterRegionInfo->getThisFieldDecl();
272 return nullptr;
275 /// Get a variable or parameter for storing global thread id
276 /// inside OpenMP construct.
277 const VarDecl *getThreadIDVariable() const override {
278 if (OuterRegionInfo)
279 return OuterRegionInfo->getThreadIDVariable();
280 return nullptr;
283 /// Get an LValue for the current ThreadID variable.
284 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
285 if (OuterRegionInfo)
286 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
287 llvm_unreachable("No LValue for inlined OpenMP construct");
290 /// Get the name of the capture helper.
291 StringRef getHelperName() const override {
292 if (auto *OuterRegionInfo = getOldCSI())
293 return OuterRegionInfo->getHelperName();
294 llvm_unreachable("No helper name for inlined OpenMP construct");
297 void emitUntiedSwitch(CodeGenFunction &CGF) override {
298 if (OuterRegionInfo)
299 OuterRegionInfo->emitUntiedSwitch(CGF);
302 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304 static bool classof(const CGCapturedStmtInfo *Info) {
305 return CGOpenMPRegionInfo::classof(Info) &&
306 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
309 ~CGOpenMPInlinedRegionInfo() override = default;
311 private:
312 /// CodeGen info about outer OpenMP region.
313 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
314 CGOpenMPRegionInfo *OuterRegionInfo;
317 /// API for captured statement code generation in OpenMP target
318 /// constructs. For this captures, implicit parameters are used instead of the
319 /// captured fields. The name of the target region has to be unique in a given
320 /// application so it is provided by the client, because only the client has
321 /// the information to generate that.
322 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
323 public:
324 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
325 const RegionCodeGenTy &CodeGen, StringRef HelperName)
326 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
327 /*HasCancel=*/false),
328 HelperName(HelperName) {}
330 /// This is unused for target regions because each starts executing
331 /// with a single thread.
332 const VarDecl *getThreadIDVariable() const override { return nullptr; }
334 /// Get the name of the capture helper.
335 StringRef getHelperName() const override { return HelperName; }
337 static bool classof(const CGCapturedStmtInfo *Info) {
338 return CGOpenMPRegionInfo::classof(Info) &&
339 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
342 private:
343 StringRef HelperName;
346 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
347 llvm_unreachable("No codegen for expressions");
349 /// API for generation of expressions captured in a innermost OpenMP
350 /// region.
351 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
352 public:
353 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
354 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
355 OMPD_unknown,
356 /*HasCancel=*/false),
357 PrivScope(CGF) {
358 // Make sure the globals captured in the provided statement are local by
359 // using the privatization logic. We assume the same variable is not
360 // captured more than once.
361 for (const auto &C : CS.captures()) {
362 if (!C.capturesVariable() && !C.capturesVariableByCopy())
363 continue;
365 const VarDecl *VD = C.getCapturedVar();
366 if (VD->isLocalVarDeclOrParm())
367 continue;
369 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
370 /*RefersToEnclosingVariableOrCapture=*/false,
371 VD->getType().getNonReferenceType(), VK_LValue,
372 C.getLocation());
373 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
375 (void)PrivScope.Privatize();
378 /// Lookup the captured field decl for a variable.
379 const FieldDecl *lookup(const VarDecl *VD) const override {
380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381 return FD;
382 return nullptr;
385 /// Emit the captured statement body.
386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387 llvm_unreachable("No body for expressions");
390 /// Get a variable or parameter for storing global thread id
391 /// inside OpenMP construct.
392 const VarDecl *getThreadIDVariable() const override {
393 llvm_unreachable("No thread id for expressions");
396 /// Get the name of the capture helper.
397 StringRef getHelperName() const override {
398 llvm_unreachable("No helper name for expressions");
401 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 private:
404 /// Private scope to capture global variables.
405 CodeGenFunction::OMPPrivateScope PrivScope;
408 /// RAII for emitting code of OpenMP constructs.
409 class InlinedOpenMPRegionRAII {
410 CodeGenFunction &CGF;
411 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
412 FieldDecl *LambdaThisCaptureField = nullptr;
413 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414 bool NoInheritance = false;
416 public:
417 /// Constructs region for combined constructs.
418 /// \param CodeGen Code generation sequence for combined directives. Includes
419 /// a list of functions used for code generation of implicitly inlined
420 /// regions.
421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422 OpenMPDirectiveKind Kind, bool HasCancel,
423 bool NoInheritance = true)
424 : CGF(CGF), NoInheritance(NoInheritance) {
425 // Start emission for the construct.
426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428 if (NoInheritance) {
429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431 CGF.LambdaThisCaptureField = nullptr;
432 BlockInfo = CGF.BlockInfo;
433 CGF.BlockInfo = nullptr;
437 ~InlinedOpenMPRegionRAII() {
438 // Restore original CapturedStmtInfo only if we're done with code emission.
439 auto *OldCSI =
440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441 delete CGF.CapturedStmtInfo;
442 CGF.CapturedStmtInfo = OldCSI;
443 if (NoInheritance) {
444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446 CGF.BlockInfo = BlockInfo;
451 /// Values for bit flags used in the ident_t to describe the fields.
452 /// All enumeric elements are named and described in accordance with the code
453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454 enum OpenMPLocationFlags : unsigned {
455 /// Use trampoline for internal microtask.
456 OMP_IDENT_IMD = 0x01,
457 /// Use c-style ident structure.
458 OMP_IDENT_KMPC = 0x02,
459 /// Atomic reduction option for kmpc_reduce.
460 OMP_ATOMIC_REDUCE = 0x10,
461 /// Explicit 'barrier' directive.
462 OMP_IDENT_BARRIER_EXPL = 0x20,
463 /// Implicit barrier in code.
464 OMP_IDENT_BARRIER_IMPL = 0x40,
465 /// Implicit barrier in 'for' directive.
466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467 /// Implicit barrier in 'sections' directive.
468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469 /// Implicit barrier in 'single' directive.
470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471 /// Call of __kmp_for_static_init for static loop.
472 OMP_IDENT_WORK_LOOP = 0x200,
473 /// Call of __kmp_for_static_init for sections.
474 OMP_IDENT_WORK_SECTIONS = 0x400,
475 /// Call of __kmp_for_static_init for distribute.
476 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
480 /// Describes ident structure that describes a source location.
481 /// All descriptions are taken from
482 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
483 /// Original structure:
484 /// typedef struct ident {
485 /// kmp_int32 reserved_1; /**< might be used in Fortran;
486 /// see above */
487 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
488 /// KMP_IDENT_KMPC identifies this union
489 /// member */
490 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
491 /// see above */
492 ///#if USE_ITT_BUILD
493 /// /* but currently used for storing
494 /// region-specific ITT */
495 /// /* contextual information. */
496 ///#endif /* USE_ITT_BUILD */
497 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
498 /// C++ */
499 /// char const *psource; /**< String describing the source location.
500 /// The string is composed of semi-colon separated
501 // fields which describe the source file,
502 /// the function and a pair of line numbers that
503 /// delimit the construct.
504 /// */
505 /// } ident_t;
506 enum IdentFieldIndex {
507 /// might be used in Fortran
508 IdentField_Reserved_1,
509 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
510 IdentField_Flags,
511 /// Not really used in Fortran any more
512 IdentField_Reserved_2,
513 /// Source[4] in Fortran, do not use for C++
514 IdentField_Reserved_3,
515 /// String describing the source location. The string is composed of
516 /// semi-colon separated fields which describe the source file, the function
517 /// and a pair of line numbers that delimit the construct.
518 IdentField_PSource
521 /// Schedule types for 'omp for' loops (these enumerators are taken from
522 /// the enum sched_type in kmp.h).
523 enum OpenMPSchedType {
524 /// Lower bound for default (unordered) versions.
525 OMP_sch_lower = 32,
526 OMP_sch_static_chunked = 33,
527 OMP_sch_static = 34,
528 OMP_sch_dynamic_chunked = 35,
529 OMP_sch_guided_chunked = 36,
530 OMP_sch_runtime = 37,
531 OMP_sch_auto = 38,
532 /// static with chunk adjustment (e.g., simd)
533 OMP_sch_static_balanced_chunked = 45,
534 /// Lower bound for 'ordered' versions.
535 OMP_ord_lower = 64,
536 OMP_ord_static_chunked = 65,
537 OMP_ord_static = 66,
538 OMP_ord_dynamic_chunked = 67,
539 OMP_ord_guided_chunked = 68,
540 OMP_ord_runtime = 69,
541 OMP_ord_auto = 70,
542 OMP_sch_default = OMP_sch_static,
543 /// dist_schedule types
544 OMP_dist_sch_static_chunked = 91,
545 OMP_dist_sch_static = 92,
546 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
547 /// Set if the monotonic schedule modifier was present.
548 OMP_sch_modifier_monotonic = (1 << 29),
549 /// Set if the nonmonotonic schedule modifier was present.
550 OMP_sch_modifier_nonmonotonic = (1 << 30),
553 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
554 /// region.
555 class CleanupTy final : public EHScopeStack::Cleanup {
556 PrePostActionTy *Action;
558 public:
559 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
560 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
561 if (!CGF.HaveInsertPoint())
562 return;
563 Action->Exit(CGF);
567 } // anonymous namespace
569 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
570 CodeGenFunction::RunCleanupsScope Scope(CGF);
571 if (PrePostAction) {
572 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
573 Callback(CodeGen, CGF, *PrePostAction);
574 } else {
575 PrePostActionTy Action;
576 Callback(CodeGen, CGF, Action);
580 /// Check if the combiner is a call to UDR combiner and if it is so return the
581 /// UDR decl used for reduction.
582 static const OMPDeclareReductionDecl *
583 getReductionInit(const Expr *ReductionOp) {
584 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
585 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
586 if (const auto *DRE =
587 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
588 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
589 return DRD;
590 return nullptr;
593 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
594 const OMPDeclareReductionDecl *DRD,
595 const Expr *InitOp,
596 Address Private, Address Original,
597 QualType Ty) {
598 if (DRD->getInitializer()) {
599 std::pair<llvm::Function *, llvm::Function *> Reduction =
600 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
601 const auto *CE = cast<CallExpr>(InitOp);
602 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
603 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
604 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
605 const auto *LHSDRE =
606 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
607 const auto *RHSDRE =
608 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
609 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
610 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
611 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
612 (void)PrivateScope.Privatize();
613 RValue Func = RValue::get(Reduction.second);
614 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
615 CGF.EmitIgnoredExpr(InitOp);
616 } else {
617 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
618 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
619 auto *GV = new llvm::GlobalVariable(
620 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
621 llvm::GlobalValue::PrivateLinkage, Init, Name);
622 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
623 RValue InitRVal;
624 switch (CGF.getEvaluationKind(Ty)) {
625 case TEK_Scalar:
626 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
627 break;
628 case TEK_Complex:
629 InitRVal =
630 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
631 break;
632 case TEK_Aggregate: {
633 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
634 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
635 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
636 /*IsInitializer=*/false);
637 return;
640 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
641 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
642 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
643 /*IsInitializer=*/false);
647 /// Emit initialization of arrays of complex types.
648 /// \param DestAddr Address of the array.
649 /// \param Type Type of array.
650 /// \param Init Initial expression of array.
651 /// \param SrcAddr Address of the original array.
652 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
653 QualType Type, bool EmitDeclareReductionInit,
654 const Expr *Init,
655 const OMPDeclareReductionDecl *DRD,
656 Address SrcAddr = Address::invalid()) {
657 // Perform element-by-element initialization.
658 QualType ElementTy;
660 // Drill down to the base element type on both arrays.
661 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
662 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
663 if (DRD)
664 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
666 llvm::Value *SrcBegin = nullptr;
667 if (DRD)
668 SrcBegin = SrcAddr.emitRawPointer(CGF);
669 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
670 // Cast from pointer to array type to pointer to single element.
671 llvm::Value *DestEnd =
672 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
673 // The basic structure here is a while-do loop.
674 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
675 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
676 llvm::Value *IsEmpty =
677 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
678 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
680 // Enter the loop body, making that address the current address.
681 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
682 CGF.EmitBlock(BodyBB);
684 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
686 llvm::PHINode *SrcElementPHI = nullptr;
687 Address SrcElementCurrent = Address::invalid();
688 if (DRD) {
689 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
690 "omp.arraycpy.srcElementPast");
691 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
692 SrcElementCurrent =
693 Address(SrcElementPHI, SrcAddr.getElementType(),
694 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
696 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
697 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
698 DestElementPHI->addIncoming(DestBegin, EntryBB);
699 Address DestElementCurrent =
700 Address(DestElementPHI, DestAddr.getElementType(),
701 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
703 // Emit copy.
705 CodeGenFunction::RunCleanupsScope InitScope(CGF);
706 if (EmitDeclareReductionInit) {
707 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
708 SrcElementCurrent, ElementTy);
709 } else
710 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
711 /*IsInitializer=*/false);
714 if (DRD) {
715 // Shift the address forward by one element.
716 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
717 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
718 "omp.arraycpy.dest.element");
719 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
722 // Shift the address forward by one element.
723 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
724 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
725 "omp.arraycpy.dest.element");
726 // Check whether we've reached the end.
727 llvm::Value *Done =
728 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
729 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
730 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
732 // Done.
733 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
736 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
737 return CGF.EmitOMPSharedLValue(E);
740 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
741 const Expr *E) {
742 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
743 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
744 return LValue();
747 void ReductionCodeGen::emitAggregateInitialization(
748 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
749 const OMPDeclareReductionDecl *DRD) {
750 // Emit VarDecl with copy init for arrays.
751 // Get the address of the original variable captured in current
752 // captured region.
753 const auto *PrivateVD =
754 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
755 bool EmitDeclareReductionInit =
756 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
757 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
758 EmitDeclareReductionInit,
759 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
760 : PrivateVD->getInit(),
761 DRD, SharedAddr);
764 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
765 ArrayRef<const Expr *> Origs,
766 ArrayRef<const Expr *> Privates,
767 ArrayRef<const Expr *> ReductionOps) {
768 ClausesData.reserve(Shareds.size());
769 SharedAddresses.reserve(Shareds.size());
770 Sizes.reserve(Shareds.size());
771 BaseDecls.reserve(Shareds.size());
772 const auto *IOrig = Origs.begin();
773 const auto *IPriv = Privates.begin();
774 const auto *IRed = ReductionOps.begin();
775 for (const Expr *Ref : Shareds) {
776 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
777 std::advance(IOrig, 1);
778 std::advance(IPriv, 1);
779 std::advance(IRed, 1);
783 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
784 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
785 "Number of generated lvalues must be exactly N.");
786 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
787 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
788 SharedAddresses.emplace_back(First, Second);
789 if (ClausesData[N].Shared == ClausesData[N].Ref) {
790 OrigAddresses.emplace_back(First, Second);
791 } else {
792 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
793 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
794 OrigAddresses.emplace_back(First, Second);
798 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
799 QualType PrivateType = getPrivateType(N);
800 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
801 if (!PrivateType->isVariablyModifiedType()) {
802 Sizes.emplace_back(
803 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
804 nullptr);
805 return;
807 llvm::Value *Size;
808 llvm::Value *SizeInChars;
809 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
810 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
811 if (AsArraySection) {
812 Size = CGF.Builder.CreatePtrDiff(ElemType,
813 OrigAddresses[N].second.getPointer(CGF),
814 OrigAddresses[N].first.getPointer(CGF));
815 Size = CGF.Builder.CreateNUWAdd(
816 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
817 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
818 } else {
819 SizeInChars =
820 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
821 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
823 Sizes.emplace_back(SizeInChars, Size);
824 CodeGenFunction::OpaqueValueMapping OpaqueMap(
825 CGF,
826 cast<OpaqueValueExpr>(
827 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
828 RValue::get(Size));
829 CGF.EmitVariablyModifiedType(PrivateType);
832 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
833 llvm::Value *Size) {
834 QualType PrivateType = getPrivateType(N);
835 if (!PrivateType->isVariablyModifiedType()) {
836 assert(!Size && !Sizes[N].second &&
837 "Size should be nullptr for non-variably modified reduction "
838 "items.");
839 return;
841 CodeGenFunction::OpaqueValueMapping OpaqueMap(
842 CGF,
843 cast<OpaqueValueExpr>(
844 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
845 RValue::get(Size));
846 CGF.EmitVariablyModifiedType(PrivateType);
849 void ReductionCodeGen::emitInitialization(
850 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
851 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
852 assert(SharedAddresses.size() > N && "No variable was generated");
853 const auto *PrivateVD =
854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855 const OMPDeclareReductionDecl *DRD =
856 getReductionInit(ClausesData[N].ReductionOp);
857 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
858 if (DRD && DRD->getInitializer())
859 (void)DefaultInit(CGF);
860 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
861 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
862 (void)DefaultInit(CGF);
863 QualType SharedType = SharedAddresses[N].first.getType();
864 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
865 PrivateAddr, SharedAddr, SharedType);
866 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
867 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
868 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
869 PrivateVD->getType().getQualifiers(),
870 /*IsInitializer=*/false);
874 bool ReductionCodeGen::needCleanups(unsigned N) {
875 QualType PrivateType = getPrivateType(N);
876 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
877 return DTorKind != QualType::DK_none;
880 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
881 Address PrivateAddr) {
882 QualType PrivateType = getPrivateType(N);
883 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
884 if (needCleanups(N)) {
885 PrivateAddr =
886 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
887 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
891 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
892 LValue BaseLV) {
893 BaseTy = BaseTy.getNonReferenceType();
894 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
895 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
896 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
897 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
898 } else {
899 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
900 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
902 BaseTy = BaseTy->getPointeeType();
904 return CGF.MakeAddrLValue(
905 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
906 BaseLV.getType(), BaseLV.getBaseInfo(),
907 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
910 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
911 Address OriginalBaseAddress, llvm::Value *Addr) {
912 RawAddress Tmp = RawAddress::invalid();
913 Address TopTmp = Address::invalid();
914 Address MostTopTmp = Address::invalid();
915 BaseTy = BaseTy.getNonReferenceType();
916 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
917 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
918 Tmp = CGF.CreateMemTemp(BaseTy);
919 if (TopTmp.isValid())
920 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
921 else
922 MostTopTmp = Tmp;
923 TopTmp = Tmp;
924 BaseTy = BaseTy->getPointeeType();
927 if (Tmp.isValid()) {
928 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
929 Addr, Tmp.getElementType());
930 CGF.Builder.CreateStore(Addr, Tmp);
931 return MostTopTmp;
934 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
935 Addr, OriginalBaseAddress.getType());
936 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
939 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
940 const VarDecl *OrigVD = nullptr;
941 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
942 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
943 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
944 Base = TempOASE->getBase()->IgnoreParenImpCasts();
945 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
946 Base = TempASE->getBase()->IgnoreParenImpCasts();
947 DE = cast<DeclRefExpr>(Base);
948 OrigVD = cast<VarDecl>(DE->getDecl());
949 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
950 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
951 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
952 Base = TempASE->getBase()->IgnoreParenImpCasts();
953 DE = cast<DeclRefExpr>(Base);
954 OrigVD = cast<VarDecl>(DE->getDecl());
956 return OrigVD;
959 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
960 Address PrivateAddr) {
961 const DeclRefExpr *DE;
962 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
963 BaseDecls.emplace_back(OrigVD);
964 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
965 LValue BaseLValue =
966 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
967 OriginalBaseLValue);
968 Address SharedAddr = SharedAddresses[N].first.getAddress();
969 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
970 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
971 SharedAddr.emitRawPointer(CGF));
972 llvm::Value *PrivatePointer =
973 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
974 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
975 llvm::Value *Ptr = CGF.Builder.CreateGEP(
976 SharedAddr.getElementType(), PrivatePointer, Adjustment);
977 return castToBase(CGF, OrigVD->getType(),
978 SharedAddresses[N].first.getType(),
979 OriginalBaseLValue.getAddress(), Ptr);
981 BaseDecls.emplace_back(
982 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
983 return PrivateAddr;
986 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
987 const OMPDeclareReductionDecl *DRD =
988 getReductionInit(ClausesData[N].ReductionOp);
989 return DRD && DRD->getInitializer();
992 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
993 return CGF.EmitLoadOfPointerLValue(
994 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
995 getThreadIDVariable()->getType()->castAs<PointerType>());
998 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
999 if (!CGF.HaveInsertPoint())
1000 return;
1001 // 1.2.2 OpenMP Language Terminology
1002 // Structured block - An executable statement with a single entry at the
1003 // top and a single exit at the bottom.
1004 // The point of exit cannot be a branch out of the structured block.
1005 // longjmp() and throw() must not violate the entry/exit criteria.
1006 CGF.EHStack.pushTerminate();
1007 if (S)
1008 CGF.incrementProfileCounter(S);
1009 CodeGen(CGF);
1010 CGF.EHStack.popTerminate();
1013 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1014 CodeGenFunction &CGF) {
1015 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1016 getThreadIDVariable()->getType(),
1017 AlignmentSource::Decl);
1020 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1021 QualType FieldTy) {
1022 auto *Field = FieldDecl::Create(
1023 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1024 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1025 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1026 Field->setAccess(AS_public);
1027 DC->addDecl(Field);
1028 return Field;
1031 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1032 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1033 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1034 llvm::OpenMPIRBuilderConfig Config(
1035 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1036 CGM.getLangOpts().OpenMPOffloadMandatory,
1037 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1038 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1039 OMPBuilder.initialize();
1040 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1041 ? CGM.getLangOpts().OMPHostIRFile
1042 : StringRef{});
1043 OMPBuilder.setConfig(Config);
1045 // The user forces the compiler to behave as if omp requires
1046 // unified_shared_memory was given.
1047 if (CGM.getLangOpts().OpenMPForceUSM) {
1048 HasRequiresUnifiedSharedMemory = true;
1049 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1053 void CGOpenMPRuntime::clear() {
1054 InternalVars.clear();
1055 // Clean non-target variable declarations possibly used only in debug info.
1056 for (const auto &Data : EmittedNonTargetVariables) {
1057 if (!Data.getValue().pointsToAliveValue())
1058 continue;
1059 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1060 if (!GV)
1061 continue;
1062 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1063 continue;
1064 GV->eraseFromParent();
1068 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1069 return OMPBuilder.createPlatformSpecificName(Parts);
1072 static llvm::Function *
1073 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1074 const Expr *CombinerInitializer, const VarDecl *In,
1075 const VarDecl *Out, bool IsCombiner) {
1076 // void .omp_combiner.(Ty *in, Ty *out);
1077 ASTContext &C = CGM.getContext();
1078 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1079 FunctionArgList Args;
1080 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1081 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1082 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1083 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1084 Args.push_back(&OmpOutParm);
1085 Args.push_back(&OmpInParm);
1086 const CGFunctionInfo &FnInfo =
1087 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1088 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1089 std::string Name = CGM.getOpenMPRuntime().getName(
1090 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1091 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1092 Name, &CGM.getModule());
1093 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1094 if (CGM.getLangOpts().Optimize) {
1095 Fn->removeFnAttr(llvm::Attribute::NoInline);
1096 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1097 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1099 CodeGenFunction CGF(CGM);
1100 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1101 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1102 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1103 Out->getLocation());
1104 CodeGenFunction::OMPPrivateScope Scope(CGF);
1105 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1106 Scope.addPrivate(
1107 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1108 .getAddress());
1109 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1110 Scope.addPrivate(
1111 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1112 .getAddress());
1113 (void)Scope.Privatize();
1114 if (!IsCombiner && Out->hasInit() &&
1115 !CGF.isTrivialInitializer(Out->getInit())) {
1116 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1117 Out->getType().getQualifiers(),
1118 /*IsInitializer=*/true);
1120 if (CombinerInitializer)
1121 CGF.EmitIgnoredExpr(CombinerInitializer);
1122 Scope.ForceCleanup();
1123 CGF.FinishFunction();
1124 return Fn;
1127 void CGOpenMPRuntime::emitUserDefinedReduction(
1128 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1129 if (UDRMap.count(D) > 0)
1130 return;
1131 llvm::Function *Combiner = emitCombinerOrInitializer(
1132 CGM, D->getType(), D->getCombiner(),
1133 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1134 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1135 /*IsCombiner=*/true);
1136 llvm::Function *Initializer = nullptr;
1137 if (const Expr *Init = D->getInitializer()) {
1138 Initializer = emitCombinerOrInitializer(
1139 CGM, D->getType(),
1140 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1141 : nullptr,
1142 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1143 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1144 /*IsCombiner=*/false);
1146 UDRMap.try_emplace(D, Combiner, Initializer);
1147 if (CGF)
1148 FunctionUDRMap[CGF->CurFn].push_back(D);
1151 std::pair<llvm::Function *, llvm::Function *>
1152 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1153 auto I = UDRMap.find(D);
1154 if (I != UDRMap.end())
1155 return I->second;
1156 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1157 return UDRMap.lookup(D);
1160 namespace {
1161 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1162 // Builder if one is present.
1163 struct PushAndPopStackRAII {
1164 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1165 bool HasCancel, llvm::omp::Directive Kind)
1166 : OMPBuilder(OMPBuilder) {
1167 if (!OMPBuilder)
1168 return;
1170 // The following callback is the crucial part of clangs cleanup process.
1172 // NOTE:
1173 // Once the OpenMPIRBuilder is used to create parallel regions (and
1174 // similar), the cancellation destination (Dest below) is determined via
1175 // IP. That means if we have variables to finalize we split the block at IP,
1176 // use the new block (=BB) as destination to build a JumpDest (via
1177 // getJumpDestInCurrentScope(BB)) which then is fed to
1178 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1179 // to push & pop an FinalizationInfo object.
1180 // The FiniCB will still be needed but at the point where the
1181 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1182 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1183 assert(IP.getBlock()->end() == IP.getPoint() &&
1184 "Clang CG should cause non-terminated block!");
1185 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1186 CGF.Builder.restoreIP(IP);
1187 CodeGenFunction::JumpDest Dest =
1188 CGF.getOMPCancelDestination(OMPD_parallel);
1189 CGF.EmitBranchThroughCleanup(Dest);
1190 return llvm::Error::success();
1193 // TODO: Remove this once we emit parallel regions through the
1194 // OpenMPIRBuilder as it can do this setup internally.
1195 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1196 OMPBuilder->pushFinalizationCB(std::move(FI));
1198 ~PushAndPopStackRAII() {
1199 if (OMPBuilder)
1200 OMPBuilder->popFinalizationCB();
1202 llvm::OpenMPIRBuilder *OMPBuilder;
1204 } // namespace
1206 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1207 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1208 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1209 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1210 assert(ThreadIDVar->getType()->isPointerType() &&
1211 "thread id variable must be of type kmp_int32 *");
1212 CodeGenFunction CGF(CGM, true);
1213 bool HasCancel = false;
1214 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1215 HasCancel = OPD->hasCancel();
1216 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1217 HasCancel = OPD->hasCancel();
1218 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1219 HasCancel = OPSD->hasCancel();
1220 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1221 HasCancel = OPFD->hasCancel();
1222 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1223 HasCancel = OPFD->hasCancel();
1224 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1225 HasCancel = OPFD->hasCancel();
1226 else if (const auto *OPFD =
1227 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1228 HasCancel = OPFD->hasCancel();
1229 else if (const auto *OPFD =
1230 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1231 HasCancel = OPFD->hasCancel();
1233 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1234 // parallel region to make cancellation barriers work properly.
1235 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1236 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1237 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1238 HasCancel, OutlinedHelperName);
1239 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1240 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1243 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1244 std::string Suffix = getName({"omp_outlined"});
1245 return (Name + Suffix).str();
1248 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1249 return getOutlinedHelperName(CGF.CurFn->getName());
1252 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1253 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1254 return (Name + Suffix).str();
1257 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1258 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1259 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1260 const RegionCodeGenTy &CodeGen) {
1261 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1262 return emitParallelOrTeamsOutlinedFunction(
1263 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1264 CodeGen);
1267 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1268 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1269 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1270 const RegionCodeGenTy &CodeGen) {
1271 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1272 return emitParallelOrTeamsOutlinedFunction(
1273 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1274 CodeGen);
1277 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1278 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1280 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1281 bool Tied, unsigned &NumberOfParts) {
1282 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1283 PrePostActionTy &) {
1284 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1285 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1286 llvm::Value *TaskArgs[] = {
1287 UpLoc, ThreadID,
1288 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1289 TaskTVar->getType()->castAs<PointerType>())
1290 .getPointer(CGF)};
1291 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1292 CGM.getModule(), OMPRTL___kmpc_omp_task),
1293 TaskArgs);
1295 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1296 UntiedCodeGen);
1297 CodeGen.setAction(Action);
1298 assert(!ThreadIDVar->getType()->isPointerType() &&
1299 "thread id variable must be of type kmp_int32 for tasks");
1300 const OpenMPDirectiveKind Region =
1301 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1302 : OMPD_task;
1303 const CapturedStmt *CS = D.getCapturedStmt(Region);
1304 bool HasCancel = false;
1305 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1306 HasCancel = TD->hasCancel();
1307 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1308 HasCancel = TD->hasCancel();
1309 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1310 HasCancel = TD->hasCancel();
1311 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1312 HasCancel = TD->hasCancel();
1314 CodeGenFunction CGF(CGM, true);
1315 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1316 InnermostKind, HasCancel, Action);
1317 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1318 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1319 if (!Tied)
1320 NumberOfParts = Action.getNumberOfParts();
1321 return Res;
1324 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1325 bool AtCurrentPoint) {
1326 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1327 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1329 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1330 if (AtCurrentPoint) {
1331 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1332 CGF.Builder.GetInsertBlock());
1333 } else {
1334 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1335 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1339 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1340 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1341 if (Elem.ServiceInsertPt) {
1342 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1343 Elem.ServiceInsertPt = nullptr;
1344 Ptr->eraseFromParent();
1348 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1349 SourceLocation Loc,
1350 SmallString<128> &Buffer) {
1351 llvm::raw_svector_ostream OS(Buffer);
1352 // Build debug location
1353 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1354 OS << ";" << PLoc.getFilename() << ";";
1355 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1356 OS << FD->getQualifiedNameAsString();
1357 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1358 return OS.str();
1361 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1362 SourceLocation Loc,
1363 unsigned Flags, bool EmitLoc) {
1364 uint32_t SrcLocStrSize;
1365 llvm::Constant *SrcLocStr;
1366 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1367 llvm::codegenoptions::NoDebugInfo) ||
1368 Loc.isInvalid()) {
1369 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1370 } else {
1371 std::string FunctionName;
1372 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1373 FunctionName = FD->getQualifiedNameAsString();
1374 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1375 const char *FileName = PLoc.getFilename();
1376 unsigned Line = PLoc.getLine();
1377 unsigned Column = PLoc.getColumn();
1378 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1379 Column, SrcLocStrSize);
1381 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1382 return OMPBuilder.getOrCreateIdent(
1383 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1386 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1387 SourceLocation Loc) {
1388 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1389 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1390 // the clang invariants used below might be broken.
1391 if (CGM.getLangOpts().OpenMPIRBuilder) {
1392 SmallString<128> Buffer;
1393 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1394 uint32_t SrcLocStrSize;
1395 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1396 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1397 return OMPBuilder.getOrCreateThreadID(
1398 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1401 llvm::Value *ThreadID = nullptr;
1402 // Check whether we've already cached a load of the thread id in this
1403 // function.
1404 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1405 if (I != OpenMPLocThreadIDMap.end()) {
1406 ThreadID = I->second.ThreadID;
1407 if (ThreadID != nullptr)
1408 return ThreadID;
1410 // If exceptions are enabled, do not use parameter to avoid possible crash.
1411 if (auto *OMPRegionInfo =
1412 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1413 if (OMPRegionInfo->getThreadIDVariable()) {
1414 // Check if this an outlined function with thread id passed as argument.
1415 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1416 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1417 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1418 !CGF.getLangOpts().CXXExceptions ||
1419 CGF.Builder.GetInsertBlock() == TopBlock ||
1420 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1421 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1422 TopBlock ||
1423 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1424 CGF.Builder.GetInsertBlock()) {
1425 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1426 // If value loaded in entry block, cache it and use it everywhere in
1427 // function.
1428 if (CGF.Builder.GetInsertBlock() == TopBlock)
1429 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1430 return ThreadID;
1435 // This is not an outlined function region - need to call __kmpc_int32
1436 // kmpc_global_thread_num(ident_t *loc).
1437 // Generate thread id value and cache this value for use across the
1438 // function.
1439 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1440 if (!Elem.ServiceInsertPt)
1441 setLocThreadIdInsertPt(CGF);
1442 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1443 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1444 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
1445 llvm::CallInst *Call = CGF.Builder.CreateCall(
1446 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1447 OMPRTL___kmpc_global_thread_num),
1448 emitUpdateLocation(CGF, Loc));
1449 Call->setCallingConv(CGF.getRuntimeCC());
1450 Elem.ThreadID = Call;
1451 return Call;
1454 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1455 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1456 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1457 clearLocThreadIdInsertPt(CGF);
1458 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1460 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1461 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1462 UDRMap.erase(D);
1463 FunctionUDRMap.erase(CGF.CurFn);
1465 auto I = FunctionUDMMap.find(CGF.CurFn);
1466 if (I != FunctionUDMMap.end()) {
1467 for(const auto *D : I->second)
1468 UDMMap.erase(D);
1469 FunctionUDMMap.erase(I);
1471 LastprivateConditionalToTypes.erase(CGF.CurFn);
1472 FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1475 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1476 return OMPBuilder.IdentPtr;
1479 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1480 if (!Kmpc_MicroTy) {
1481 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1482 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1483 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1484 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1486 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1489 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1490 convertDeviceClause(const VarDecl *VD) {
1491 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1492 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1493 if (!DevTy)
1494 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1496 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1497 case OMPDeclareTargetDeclAttr::DT_Host:
1498 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1499 break;
1500 case OMPDeclareTargetDeclAttr::DT_NoHost:
1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1502 break;
1503 case OMPDeclareTargetDeclAttr::DT_Any:
1504 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1505 break;
1506 default:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1508 break;
1512 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1513 convertCaptureClause(const VarDecl *VD) {
1514 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1515 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1516 if (!MapType)
1517 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1518 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1519 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1520 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1521 break;
1522 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1523 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1524 break;
1525 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1527 break;
1528 default:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1530 break;
1534 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1535 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1536 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1538 auto FileInfoCallBack = [&]() {
1539 SourceManager &SM = CGM.getContext().getSourceManager();
1540 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1542 llvm::sys::fs::UniqueID ID;
1543 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1544 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1547 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1550 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1553 ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1554 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1556 auto LinkageForVariable = [&VD, this]() {
1557 return CGM.getLLVMLinkageVarDefinition(VD);
1560 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1562 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1563 CGM.getContext().getPointerType(VD->getType()));
1564 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1565 convertCaptureClause(VD), convertDeviceClause(VD),
1566 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1567 VD->isExternallyVisible(),
1568 getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1569 VD->getCanonicalDecl()->getBeginLoc()),
1570 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1571 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1572 LinkageForVariable);
1574 if (!addr)
1575 return ConstantAddress::invalid();
1576 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1579 llvm::Constant *
1580 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1581 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1582 !CGM.getContext().getTargetInfo().isTLSSupported());
1583 // Lookup the entry, lazily creating it if necessary.
1584 std::string Suffix = getName({"cache", ""});
1585 return OMPBuilder.getOrCreateInternalVariable(
1586 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1589 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1590 const VarDecl *VD,
1591 Address VDAddr,
1592 SourceLocation Loc) {
1593 if (CGM.getLangOpts().OpenMPUseTLS &&
1594 CGM.getContext().getTargetInfo().isTLSSupported())
1595 return VDAddr;
1597 llvm::Type *VarTy = VDAddr.getElementType();
1598 llvm::Value *Args[] = {
1599 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1600 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1601 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1602 getOrCreateThreadPrivateCache(VD)};
1603 return Address(
1604 CGF.EmitRuntimeCall(
1605 OMPBuilder.getOrCreateRuntimeFunction(
1606 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1607 Args),
1608 CGF.Int8Ty, VDAddr.getAlignment());
1611 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1612 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1613 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1614 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1615 // library.
1616 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1617 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1618 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1619 OMPLoc);
1620 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1621 // to register constructor/destructor for variable.
1622 llvm::Value *Args[] = {
1623 OMPLoc,
1624 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1625 Ctor, CopyCtor, Dtor};
1626 CGF.EmitRuntimeCall(
1627 OMPBuilder.getOrCreateRuntimeFunction(
1628 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1629 Args);
1632 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1633 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1634 bool PerformInit, CodeGenFunction *CGF) {
1635 if (CGM.getLangOpts().OpenMPUseTLS &&
1636 CGM.getContext().getTargetInfo().isTLSSupported())
1637 return nullptr;
1639 VD = VD->getDefinition(CGM.getContext());
1640 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1641 QualType ASTTy = VD->getType();
1643 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1644 const Expr *Init = VD->getAnyInitializer();
1645 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1646 // Generate function that re-emits the declaration's initializer into the
1647 // threadprivate copy of the variable VD
1648 CodeGenFunction CtorCGF(CGM);
1649 FunctionArgList Args;
1650 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1651 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1652 ImplicitParamKind::Other);
1653 Args.push_back(&Dst);
1655 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1656 CGM.getContext().VoidPtrTy, Args);
1657 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1658 std::string Name = getName({"__kmpc_global_ctor_", ""});
1659 llvm::Function *Fn =
1660 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1661 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1662 Args, Loc, Loc);
1663 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1664 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1665 CGM.getContext().VoidPtrTy, Dst.getLocation());
1666 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1667 VDAddr.getAlignment());
1668 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1669 /*IsInitializer=*/true);
1670 ArgVal = CtorCGF.EmitLoadOfScalar(
1671 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1672 CGM.getContext().VoidPtrTy, Dst.getLocation());
1673 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1674 CtorCGF.FinishFunction();
1675 Ctor = Fn;
1677 if (VD->getType().isDestructedType() != QualType::DK_none) {
1678 // Generate function that emits destructor call for the threadprivate copy
1679 // of the variable VD
1680 CodeGenFunction DtorCGF(CGM);
1681 FunctionArgList Args;
1682 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1683 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1684 ImplicitParamKind::Other);
1685 Args.push_back(&Dst);
1687 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1688 CGM.getContext().VoidTy, Args);
1689 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1690 std::string Name = getName({"__kmpc_global_dtor_", ""});
1691 llvm::Function *Fn =
1692 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1693 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1694 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1695 Loc, Loc);
1696 // Create a scope with an artificial location for the body of this function.
1697 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1698 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1699 DtorCGF.GetAddrOfLocalVar(&Dst),
1700 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1701 DtorCGF.emitDestroy(
1702 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1703 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1704 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1705 DtorCGF.FinishFunction();
1706 Dtor = Fn;
1708 // Do not emit init function if it is not required.
1709 if (!Ctor && !Dtor)
1710 return nullptr;
1712 // Copying constructor for the threadprivate variable.
1713 // Must be NULL - reserved by runtime, but currently it requires that this
1714 // parameter is always NULL. Otherwise it fires assertion.
1715 CopyCtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1716 if (Ctor == nullptr) {
1717 Ctor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1719 if (Dtor == nullptr) {
1720 Dtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1722 if (!CGF) {
1723 auto *InitFunctionTy =
1724 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1725 std::string Name = getName({"__omp_threadprivate_init_", ""});
1726 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1727 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1728 CodeGenFunction InitCGF(CGM);
1729 FunctionArgList ArgList;
1730 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1731 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1732 Loc, Loc);
1733 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1734 InitCGF.FinishFunction();
1735 return InitFunction;
1737 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1739 return nullptr;
1742 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1743 llvm::GlobalValue *GV) {
1744 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1745 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1747 // We only need to handle active 'indirect' declare target functions.
1748 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1749 return;
1751 // Get a mangled name to store the new device global in.
1752 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1753 CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1754 SmallString<128> Name;
1755 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1757 // We need to generate a new global to hold the address of the indirectly
1758 // called device function. Doing this allows us to keep the visibility and
1759 // linkage of the associated function unchanged while allowing the runtime to
1760 // access its value.
1761 llvm::GlobalValue *Addr = GV;
1762 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1763 Addr = new llvm::GlobalVariable(
1764 CGM.getModule(), CGM.VoidPtrTy,
1765 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1766 nullptr, llvm::GlobalValue::NotThreadLocal,
1767 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1768 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1771 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1772 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1773 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1774 llvm::GlobalValue::WeakODRLinkage);
1777 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1778 QualType VarType,
1779 StringRef Name) {
1780 std::string Suffix = getName({"artificial", ""});
1781 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1782 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1783 VarLVType, Twine(Name).concat(Suffix).str());
1784 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1785 CGM.getTarget().isTLSSupported()) {
1786 GAddr->setThreadLocal(/*Val=*/true);
1787 return Address(GAddr, GAddr->getValueType(),
1788 CGM.getContext().getTypeAlignInChars(VarType));
1790 std::string CacheSuffix = getName({"cache", ""});
1791 llvm::Value *Args[] = {
1792 emitUpdateLocation(CGF, SourceLocation()),
1793 getThreadID(CGF, SourceLocation()),
1794 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1795 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1796 /*isSigned=*/false),
1797 OMPBuilder.getOrCreateInternalVariable(
1798 CGM.VoidPtrPtrTy,
1799 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1800 return Address(
1801 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1802 CGF.EmitRuntimeCall(
1803 OMPBuilder.getOrCreateRuntimeFunction(
1804 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1805 Args),
1806 CGF.Builder.getPtrTy(0)),
1807 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1810 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1811 const RegionCodeGenTy &ThenGen,
1812 const RegionCodeGenTy &ElseGen) {
1813 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1815 // If the condition constant folds and can be elided, try to avoid emitting
1816 // the condition and the dead arm of the if/else.
1817 bool CondConstant;
1818 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1819 if (CondConstant)
1820 ThenGen(CGF);
1821 else
1822 ElseGen(CGF);
1823 return;
1826 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1827 // emit the conditional branch.
1828 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1829 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1830 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1831 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1833 // Emit the 'then' code.
1834 CGF.EmitBlock(ThenBlock);
1835 ThenGen(CGF);
1836 CGF.EmitBranch(ContBlock);
1837 // Emit the 'else' code if present.
1838 // There is no need to emit line number for unconditional branch.
1839 (void)ApplyDebugLocation::CreateEmpty(CGF);
1840 CGF.EmitBlock(ElseBlock);
1841 ElseGen(CGF);
1842 // There is no need to emit line number for unconditional branch.
1843 (void)ApplyDebugLocation::CreateEmpty(CGF);
1844 CGF.EmitBranch(ContBlock);
1845 // Emit the continuation block for code after the if.
1846 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1849 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1850 llvm::Function *OutlinedFn,
1851 ArrayRef<llvm::Value *> CapturedVars,
1852 const Expr *IfCond,
1853 llvm::Value *NumThreads) {
1854 if (!CGF.HaveInsertPoint())
1855 return;
1856 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1857 auto &M = CGM.getModule();
1858 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1859 this](CodeGenFunction &CGF, PrePostActionTy &) {
1860 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1861 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1862 llvm::Value *Args[] = {
1863 RTLoc,
1864 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1865 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1866 llvm::SmallVector<llvm::Value *, 16> RealArgs;
1867 RealArgs.append(std::begin(Args), std::end(Args));
1868 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1870 llvm::FunctionCallee RTLFn =
1871 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1872 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1874 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1875 this](CodeGenFunction &CGF, PrePostActionTy &) {
1876 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1877 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1878 // Build calls:
1879 // __kmpc_serialized_parallel(&Loc, GTid);
1880 llvm::Value *Args[] = {RTLoc, ThreadID};
1881 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1882 M, OMPRTL___kmpc_serialized_parallel),
1883 Args);
1885 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1886 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1887 RawAddress ZeroAddrBound =
1888 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1889 /*Name=*/".bound.zero.addr");
1890 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1891 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1892 // ThreadId for serialized parallels is 0.
1893 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1894 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1895 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1897 // Ensure we do not inline the function. This is trivially true for the ones
1898 // passed to __kmpc_fork_call but the ones called in serialized regions
1899 // could be inlined. This is not a perfect but it is closer to the invariant
1900 // we want, namely, every data environment starts with a new function.
1901 // TODO: We should pass the if condition to the runtime function and do the
1902 // handling there. Much cleaner code.
1903 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1904 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1905 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1907 // __kmpc_end_serialized_parallel(&Loc, GTid);
1908 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1909 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1910 M, OMPRTL___kmpc_end_serialized_parallel),
1911 EndArgs);
1913 if (IfCond) {
1914 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1915 } else {
1916 RegionCodeGenTy ThenRCG(ThenGen);
1917 ThenRCG(CGF);
1921 // If we're inside an (outlined) parallel region, use the region info's
1922 // thread-ID variable (it is passed in a first argument of the outlined function
1923 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1924 // regular serial code region, get thread ID by calling kmp_int32
1925 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1926 // return the address of that temp.
1927 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1928 SourceLocation Loc) {
1929 if (auto *OMPRegionInfo =
1930 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1931 if (OMPRegionInfo->getThreadIDVariable())
1932 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1934 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1935 QualType Int32Ty =
1936 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1937 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1938 CGF.EmitStoreOfScalar(ThreadID,
1939 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1941 return ThreadIDTemp;
1944 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1945 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1946 std::string Name = getName({Prefix, "var"});
1947 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1950 namespace {
1951 /// Common pre(post)-action for different OpenMP constructs.
1952 class CommonActionTy final : public PrePostActionTy {
1953 llvm::FunctionCallee EnterCallee;
1954 ArrayRef<llvm::Value *> EnterArgs;
1955 llvm::FunctionCallee ExitCallee;
1956 ArrayRef<llvm::Value *> ExitArgs;
1957 bool Conditional;
1958 llvm::BasicBlock *ContBlock = nullptr;
1960 public:
1961 CommonActionTy(llvm::FunctionCallee EnterCallee,
1962 ArrayRef<llvm::Value *> EnterArgs,
1963 llvm::FunctionCallee ExitCallee,
1964 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1965 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1966 ExitArgs(ExitArgs), Conditional(Conditional) {}
1967 void Enter(CodeGenFunction &CGF) override {
1968 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1969 if (Conditional) {
1970 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1971 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1972 ContBlock = CGF.createBasicBlock("omp_if.end");
1973 // Generate the branch (If-stmt)
1974 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1975 CGF.EmitBlock(ThenBlock);
1978 void Done(CodeGenFunction &CGF) {
1979 // Emit the rest of blocks/branches
1980 CGF.EmitBranch(ContBlock);
1981 CGF.EmitBlock(ContBlock, true);
1983 void Exit(CodeGenFunction &CGF) override {
1984 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1987 } // anonymous namespace
1989 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1990 StringRef CriticalName,
1991 const RegionCodeGenTy &CriticalOpGen,
1992 SourceLocation Loc, const Expr *Hint) {
1993 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1994 // CriticalOpGen();
1995 // __kmpc_end_critical(ident_t *, gtid, Lock);
1996 // Prepare arguments and build a call to __kmpc_critical
1997 if (!CGF.HaveInsertPoint())
1998 return;
1999 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2000 getCriticalRegionLock(CriticalName)};
2001 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2002 std::end(Args));
2003 if (Hint) {
2004 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2005 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2007 CommonActionTy Action(
2008 OMPBuilder.getOrCreateRuntimeFunction(
2009 CGM.getModule(),
2010 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2011 EnterArgs,
2012 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2013 OMPRTL___kmpc_end_critical),
2014 Args);
2015 CriticalOpGen.setAction(Action);
2016 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2019 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2020 const RegionCodeGenTy &MasterOpGen,
2021 SourceLocation Loc) {
2022 if (!CGF.HaveInsertPoint())
2023 return;
2024 // if(__kmpc_master(ident_t *, gtid)) {
2025 // MasterOpGen();
2026 // __kmpc_end_master(ident_t *, gtid);
2027 // }
2028 // Prepare arguments and build a call to __kmpc_master
2029 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2030 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2031 CGM.getModule(), OMPRTL___kmpc_master),
2032 Args,
2033 OMPBuilder.getOrCreateRuntimeFunction(
2034 CGM.getModule(), OMPRTL___kmpc_end_master),
2035 Args,
2036 /*Conditional=*/true);
2037 MasterOpGen.setAction(Action);
2038 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2039 Action.Done(CGF);
2042 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2043 const RegionCodeGenTy &MaskedOpGen,
2044 SourceLocation Loc, const Expr *Filter) {
2045 if (!CGF.HaveInsertPoint())
2046 return;
2047 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2048 // MaskedOpGen();
2049 // __kmpc_end_masked(iden_t *, gtid);
2050 // }
2051 // Prepare arguments and build a call to __kmpc_masked
2052 llvm::Value *FilterVal = Filter
2053 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2054 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2055 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2056 FilterVal};
2057 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2058 getThreadID(CGF, Loc)};
2059 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2060 CGM.getModule(), OMPRTL___kmpc_masked),
2061 Args,
2062 OMPBuilder.getOrCreateRuntimeFunction(
2063 CGM.getModule(), OMPRTL___kmpc_end_masked),
2064 ArgsEnd,
2065 /*Conditional=*/true);
2066 MaskedOpGen.setAction(Action);
2067 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2068 Action.Done(CGF);
2071 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2072 SourceLocation Loc) {
2073 if (!CGF.HaveInsertPoint())
2074 return;
2075 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2076 OMPBuilder.createTaskyield(CGF.Builder);
2077 } else {
2078 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2079 llvm::Value *Args[] = {
2080 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2081 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2082 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2083 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2084 Args);
2087 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2088 Region->emitUntiedSwitch(CGF);
2091 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2092 const RegionCodeGenTy &TaskgroupOpGen,
2093 SourceLocation Loc) {
2094 if (!CGF.HaveInsertPoint())
2095 return;
2096 // __kmpc_taskgroup(ident_t *, gtid);
2097 // TaskgroupOpGen();
2098 // __kmpc_end_taskgroup(ident_t *, gtid);
2099 // Prepare arguments and build a call to __kmpc_taskgroup
2100 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2101 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2102 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2103 Args,
2104 OMPBuilder.getOrCreateRuntimeFunction(
2105 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2106 Args);
2107 TaskgroupOpGen.setAction(Action);
2108 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2111 /// Given an array of pointers to variables, project the address of a
2112 /// given variable.
2113 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2114 unsigned Index, const VarDecl *Var) {
2115 // Pull out the pointer to the variable.
2116 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2117 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2119 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2120 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2123 static llvm::Value *emitCopyprivateCopyFunction(
2124 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2125 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2126 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2127 SourceLocation Loc) {
2128 ASTContext &C = CGM.getContext();
2129 // void copy_func(void *LHSArg, void *RHSArg);
2130 FunctionArgList Args;
2131 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2132 ImplicitParamKind::Other);
2133 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2134 ImplicitParamKind::Other);
2135 Args.push_back(&LHSArg);
2136 Args.push_back(&RHSArg);
2137 const auto &CGFI =
2138 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2139 std::string Name =
2140 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2141 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2142 llvm::GlobalValue::InternalLinkage, Name,
2143 &CGM.getModule());
2144 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2145 Fn->setDoesNotRecurse();
2146 CodeGenFunction CGF(CGM);
2147 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2148 // Dest = (void*[n])(LHSArg);
2149 // Src = (void*[n])(RHSArg);
2150 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2151 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2152 CGF.Builder.getPtrTy(0)),
2153 ArgsElemType, CGF.getPointerAlign());
2154 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2155 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2156 CGF.Builder.getPtrTy(0)),
2157 ArgsElemType, CGF.getPointerAlign());
2158 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2159 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2160 // ...
2161 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2162 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2163 const auto *DestVar =
2164 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2165 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2167 const auto *SrcVar =
2168 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2169 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2171 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2172 QualType Type = VD->getType();
2173 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2175 CGF.FinishFunction();
2176 return Fn;
2179 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2180 const RegionCodeGenTy &SingleOpGen,
2181 SourceLocation Loc,
2182 ArrayRef<const Expr *> CopyprivateVars,
2183 ArrayRef<const Expr *> SrcExprs,
2184 ArrayRef<const Expr *> DstExprs,
2185 ArrayRef<const Expr *> AssignmentOps) {
2186 if (!CGF.HaveInsertPoint())
2187 return;
2188 assert(CopyprivateVars.size() == SrcExprs.size() &&
2189 CopyprivateVars.size() == DstExprs.size() &&
2190 CopyprivateVars.size() == AssignmentOps.size());
2191 ASTContext &C = CGM.getContext();
2192 // int32 did_it = 0;
2193 // if(__kmpc_single(ident_t *, gtid)) {
2194 // SingleOpGen();
2195 // __kmpc_end_single(ident_t *, gtid);
2196 // did_it = 1;
2197 // }
2198 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2199 // <copy_func>, did_it);
2201 Address DidIt = Address::invalid();
2202 if (!CopyprivateVars.empty()) {
2203 // int32 did_it = 0;
2204 QualType KmpInt32Ty =
2205 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2206 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2207 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2209 // Prepare arguments and build a call to __kmpc_single
2210 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2211 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2212 CGM.getModule(), OMPRTL___kmpc_single),
2213 Args,
2214 OMPBuilder.getOrCreateRuntimeFunction(
2215 CGM.getModule(), OMPRTL___kmpc_end_single),
2216 Args,
2217 /*Conditional=*/true);
2218 SingleOpGen.setAction(Action);
2219 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2220 if (DidIt.isValid()) {
2221 // did_it = 1;
2222 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2224 Action.Done(CGF);
2225 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2226 // <copy_func>, did_it);
2227 if (DidIt.isValid()) {
2228 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2229 QualType CopyprivateArrayTy = C.getConstantArrayType(
2230 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2231 /*IndexTypeQuals=*/0);
2232 // Create a list of all private variables for copyprivate.
2233 Address CopyprivateList =
2234 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2235 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2236 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2237 CGF.Builder.CreateStore(
2238 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2239 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2240 CGF.VoidPtrTy),
2241 Elem);
2243 // Build function that copies private values from single region to all other
2244 // threads in the corresponding parallel region.
2245 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2246 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2247 SrcExprs, DstExprs, AssignmentOps, Loc);
2248 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2249 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2250 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2251 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2252 llvm::Value *Args[] = {
2253 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2254 getThreadID(CGF, Loc), // i32 <gtid>
2255 BufSize, // size_t <buf_size>
2256 CL.emitRawPointer(CGF), // void *<copyprivate list>
2257 CpyFn, // void (*) (void *, void *) <copy_func>
2258 DidItVal // i32 did_it
2260 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2261 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2262 Args);
2266 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2267 const RegionCodeGenTy &OrderedOpGen,
2268 SourceLocation Loc, bool IsThreads) {
2269 if (!CGF.HaveInsertPoint())
2270 return;
2271 // __kmpc_ordered(ident_t *, gtid);
2272 // OrderedOpGen();
2273 // __kmpc_end_ordered(ident_t *, gtid);
2274 // Prepare arguments and build a call to __kmpc_ordered
2275 if (IsThreads) {
2276 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2277 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2278 CGM.getModule(), OMPRTL___kmpc_ordered),
2279 Args,
2280 OMPBuilder.getOrCreateRuntimeFunction(
2281 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2282 Args);
2283 OrderedOpGen.setAction(Action);
2284 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2285 return;
2287 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2290 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2291 unsigned Flags;
2292 if (Kind == OMPD_for)
2293 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2294 else if (Kind == OMPD_sections)
2295 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2296 else if (Kind == OMPD_single)
2297 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2298 else if (Kind == OMPD_barrier)
2299 Flags = OMP_IDENT_BARRIER_EXPL;
2300 else
2301 Flags = OMP_IDENT_BARRIER_IMPL;
2302 return Flags;
2305 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2306 CodeGenFunction &CGF, const OMPLoopDirective &S,
2307 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2308 // Check if the loop directive is actually a doacross loop directive. In this
2309 // case choose static, 1 schedule.
2310 if (llvm::any_of(
2311 S.getClausesOfKind<OMPOrderedClause>(),
2312 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2313 ScheduleKind = OMPC_SCHEDULE_static;
2314 // Chunk size is 1 in this case.
2315 llvm::APInt ChunkSize(32, 1);
2316 ChunkExpr = IntegerLiteral::Create(
2317 CGF.getContext(), ChunkSize,
2318 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2319 SourceLocation());
2323 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2324 OpenMPDirectiveKind Kind, bool EmitChecks,
2325 bool ForceSimpleCall) {
2326 // Check if we should use the OMPBuilder
2327 auto *OMPRegionInfo =
2328 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2329 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2330 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
2331 OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2332 EmitChecks);
2333 assert(AfterIP && "unexpected error creating barrier");
2334 CGF.Builder.restoreIP(*AfterIP);
2335 return;
2338 if (!CGF.HaveInsertPoint())
2339 return;
2340 // Build call __kmpc_cancel_barrier(loc, thread_id);
2341 // Build call __kmpc_barrier(loc, thread_id);
2342 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2343 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2344 // thread_id);
2345 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2346 getThreadID(CGF, Loc)};
2347 if (OMPRegionInfo) {
2348 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2349 llvm::Value *Result = CGF.EmitRuntimeCall(
2350 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2351 OMPRTL___kmpc_cancel_barrier),
2352 Args);
2353 if (EmitChecks) {
2354 // if (__kmpc_cancel_barrier()) {
2355 // exit from construct;
2356 // }
2357 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2358 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2359 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2360 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2361 CGF.EmitBlock(ExitBB);
2362 // exit from construct;
2363 CodeGenFunction::JumpDest CancelDestination =
2364 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2365 CGF.EmitBranchThroughCleanup(CancelDestination);
2366 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2368 return;
2371 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2372 CGM.getModule(), OMPRTL___kmpc_barrier),
2373 Args);
2376 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2377 Expr *ME, bool IsFatal) {
2378 llvm::Value *MVL =
2379 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2380 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2381 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2382 // *message)
2383 llvm::Value *Args[] = {
2384 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2385 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2386 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2387 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2388 CGM.getModule(), OMPRTL___kmpc_error),
2389 Args);
2392 /// Map the OpenMP loop schedule to the runtime enumeration.
2393 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2394 bool Chunked, bool Ordered) {
2395 switch (ScheduleKind) {
2396 case OMPC_SCHEDULE_static:
2397 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2398 : (Ordered ? OMP_ord_static : OMP_sch_static);
2399 case OMPC_SCHEDULE_dynamic:
2400 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2401 case OMPC_SCHEDULE_guided:
2402 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2403 case OMPC_SCHEDULE_runtime:
2404 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2405 case OMPC_SCHEDULE_auto:
2406 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2407 case OMPC_SCHEDULE_unknown:
2408 assert(!Chunked && "chunk was specified but schedule kind not known");
2409 return Ordered ? OMP_ord_static : OMP_sch_static;
2411 llvm_unreachable("Unexpected runtime schedule");
2414 /// Map the OpenMP distribute schedule to the runtime enumeration.
2415 static OpenMPSchedType
2416 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2417 // only static is allowed for dist_schedule
2418 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2421 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2422 bool Chunked) const {
2423 OpenMPSchedType Schedule =
2424 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2425 return Schedule == OMP_sch_static;
2428 bool CGOpenMPRuntime::isStaticNonchunked(
2429 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2430 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2431 return Schedule == OMP_dist_sch_static;
2434 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2435 bool Chunked) const {
2436 OpenMPSchedType Schedule =
2437 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2438 return Schedule == OMP_sch_static_chunked;
2441 bool CGOpenMPRuntime::isStaticChunked(
2442 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2443 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2444 return Schedule == OMP_dist_sch_static_chunked;
2447 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2448 OpenMPSchedType Schedule =
2449 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2450 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2451 return Schedule != OMP_sch_static;
2454 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2455 OpenMPScheduleClauseModifier M1,
2456 OpenMPScheduleClauseModifier M2) {
2457 int Modifier = 0;
2458 switch (M1) {
2459 case OMPC_SCHEDULE_MODIFIER_monotonic:
2460 Modifier = OMP_sch_modifier_monotonic;
2461 break;
2462 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2463 Modifier = OMP_sch_modifier_nonmonotonic;
2464 break;
2465 case OMPC_SCHEDULE_MODIFIER_simd:
2466 if (Schedule == OMP_sch_static_chunked)
2467 Schedule = OMP_sch_static_balanced_chunked;
2468 break;
2469 case OMPC_SCHEDULE_MODIFIER_last:
2470 case OMPC_SCHEDULE_MODIFIER_unknown:
2471 break;
2473 switch (M2) {
2474 case OMPC_SCHEDULE_MODIFIER_monotonic:
2475 Modifier = OMP_sch_modifier_monotonic;
2476 break;
2477 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2478 Modifier = OMP_sch_modifier_nonmonotonic;
2479 break;
2480 case OMPC_SCHEDULE_MODIFIER_simd:
2481 if (Schedule == OMP_sch_static_chunked)
2482 Schedule = OMP_sch_static_balanced_chunked;
2483 break;
2484 case OMPC_SCHEDULE_MODIFIER_last:
2485 case OMPC_SCHEDULE_MODIFIER_unknown:
2486 break;
2488 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2489 // If the static schedule kind is specified or if the ordered clause is
2490 // specified, and if the nonmonotonic modifier is not specified, the effect is
2491 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2492 // modifier is specified, the effect is as if the nonmonotonic modifier is
2493 // specified.
2494 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2495 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2496 Schedule == OMP_sch_static_balanced_chunked ||
2497 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2498 Schedule == OMP_dist_sch_static_chunked ||
2499 Schedule == OMP_dist_sch_static))
2500 Modifier = OMP_sch_modifier_nonmonotonic;
2502 return Schedule | Modifier;
2505 void CGOpenMPRuntime::emitForDispatchInit(
2506 CodeGenFunction &CGF, SourceLocation Loc,
2507 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2508 bool Ordered, const DispatchRTInput &DispatchValues) {
2509 if (!CGF.HaveInsertPoint())
2510 return;
2511 OpenMPSchedType Schedule = getRuntimeSchedule(
2512 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2513 assert(Ordered ||
2514 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2515 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2516 Schedule != OMP_sch_static_balanced_chunked));
2517 // Call __kmpc_dispatch_init(
2518 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2519 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2520 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2522 // If the Chunk was not specified in the clause - use default value 1.
2523 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2524 : CGF.Builder.getIntN(IVSize, 1);
2525 llvm::Value *Args[] = {
2526 emitUpdateLocation(CGF, Loc),
2527 getThreadID(CGF, Loc),
2528 CGF.Builder.getInt32(addMonoNonMonoModifier(
2529 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2530 DispatchValues.LB, // Lower
2531 DispatchValues.UB, // Upper
2532 CGF.Builder.getIntN(IVSize, 1), // Stride
2533 Chunk // Chunk
2535 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2536 Args);
2539 void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2540 SourceLocation Loc) {
2541 if (!CGF.HaveInsertPoint())
2542 return;
2543 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2544 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2545 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2548 static void emitForStaticInitCall(
2549 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2550 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2551 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2552 const CGOpenMPRuntime::StaticRTInput &Values) {
2553 if (!CGF.HaveInsertPoint())
2554 return;
2556 assert(!Values.Ordered);
2557 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2558 Schedule == OMP_sch_static_balanced_chunked ||
2559 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2560 Schedule == OMP_dist_sch_static ||
2561 Schedule == OMP_dist_sch_static_chunked);
2563 // Call __kmpc_for_static_init(
2564 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2565 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2566 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2567 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2568 llvm::Value *Chunk = Values.Chunk;
2569 if (Chunk == nullptr) {
2570 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2571 Schedule == OMP_dist_sch_static) &&
2572 "expected static non-chunked schedule");
2573 // If the Chunk was not specified in the clause - use default value 1.
2574 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2575 } else {
2576 assert((Schedule == OMP_sch_static_chunked ||
2577 Schedule == OMP_sch_static_balanced_chunked ||
2578 Schedule == OMP_ord_static_chunked ||
2579 Schedule == OMP_dist_sch_static_chunked) &&
2580 "expected static chunked schedule");
2582 llvm::Value *Args[] = {
2583 UpdateLocation,
2584 ThreadId,
2585 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2586 M2)), // Schedule type
2587 Values.IL.emitRawPointer(CGF), // &isLastIter
2588 Values.LB.emitRawPointer(CGF), // &LB
2589 Values.UB.emitRawPointer(CGF), // &UB
2590 Values.ST.emitRawPointer(CGF), // &Stride
2591 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2592 Chunk // Chunk
2594 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2597 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2598 SourceLocation Loc,
2599 OpenMPDirectiveKind DKind,
2600 const OpenMPScheduleTy &ScheduleKind,
2601 const StaticRTInput &Values) {
2602 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2603 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2604 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2605 "Expected loop-based or sections-based directive.");
2606 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2607 isOpenMPLoopDirective(DKind)
2608 ? OMP_IDENT_WORK_LOOP
2609 : OMP_IDENT_WORK_SECTIONS);
2610 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2611 llvm::FunctionCallee StaticInitFunction =
2612 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2613 false);
2614 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2615 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2616 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2619 void CGOpenMPRuntime::emitDistributeStaticInit(
2620 CodeGenFunction &CGF, SourceLocation Loc,
2621 OpenMPDistScheduleClauseKind SchedKind,
2622 const CGOpenMPRuntime::StaticRTInput &Values) {
2623 OpenMPSchedType ScheduleNum =
2624 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2625 llvm::Value *UpdatedLocation =
2626 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2627 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2628 llvm::FunctionCallee StaticInitFunction;
2629 bool isGPUDistribute =
2630 CGM.getLangOpts().OpenMPIsTargetDevice &&
2631 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2632 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2633 Values.IVSize, Values.IVSigned, isGPUDistribute);
2635 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2636 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2637 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2640 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2641 SourceLocation Loc,
2642 OpenMPDirectiveKind DKind) {
2643 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2644 DKind == OMPD_sections) &&
2645 "Expected distribute, for, or sections directive kind");
2646 if (!CGF.HaveInsertPoint())
2647 return;
2648 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2649 llvm::Value *Args[] = {
2650 emitUpdateLocation(CGF, Loc,
2651 isOpenMPDistributeDirective(DKind) ||
2652 (DKind == OMPD_target_teams_loop)
2653 ? OMP_IDENT_WORK_DISTRIBUTE
2654 : isOpenMPLoopDirective(DKind)
2655 ? OMP_IDENT_WORK_LOOP
2656 : OMP_IDENT_WORK_SECTIONS),
2657 getThreadID(CGF, Loc)};
2658 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2659 if (isOpenMPDistributeDirective(DKind) &&
2660 CGM.getLangOpts().OpenMPIsTargetDevice &&
2661 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2662 CGF.EmitRuntimeCall(
2663 OMPBuilder.getOrCreateRuntimeFunction(
2664 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2665 Args);
2666 else
2667 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2668 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2669 Args);
2672 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2673 SourceLocation Loc,
2674 unsigned IVSize,
2675 bool IVSigned) {
2676 if (!CGF.HaveInsertPoint())
2677 return;
2678 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2679 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2680 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2681 Args);
2684 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2685 SourceLocation Loc, unsigned IVSize,
2686 bool IVSigned, Address IL,
2687 Address LB, Address UB,
2688 Address ST) {
2689 // Call __kmpc_dispatch_next(
2690 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2691 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2692 // kmp_int[32|64] *p_stride);
2693 llvm::Value *Args[] = {
2694 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2695 IL.emitRawPointer(CGF), // &isLastIter
2696 LB.emitRawPointer(CGF), // &Lower
2697 UB.emitRawPointer(CGF), // &Upper
2698 ST.emitRawPointer(CGF) // &Stride
2700 llvm::Value *Call = CGF.EmitRuntimeCall(
2701 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2702 return CGF.EmitScalarConversion(
2703 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2704 CGF.getContext().BoolTy, Loc);
2707 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2708 llvm::Value *NumThreads,
2709 SourceLocation Loc) {
2710 if (!CGF.HaveInsertPoint())
2711 return;
2712 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2713 llvm::Value *Args[] = {
2714 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2715 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2716 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2717 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2718 Args);
2721 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2722 ProcBindKind ProcBind,
2723 SourceLocation Loc) {
2724 if (!CGF.HaveInsertPoint())
2725 return;
2726 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2727 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2728 llvm::Value *Args[] = {
2729 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2730 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2731 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2732 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2733 Args);
2736 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2737 SourceLocation Loc, llvm::AtomicOrdering AO) {
2738 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2739 OMPBuilder.createFlush(CGF.Builder);
2740 } else {
2741 if (!CGF.HaveInsertPoint())
2742 return;
2743 // Build call void __kmpc_flush(ident_t *loc)
2744 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2745 CGM.getModule(), OMPRTL___kmpc_flush),
2746 emitUpdateLocation(CGF, Loc));
2750 namespace {
2751 /// Indexes of fields for type kmp_task_t.
2752 enum KmpTaskTFields {
2753 /// List of shared variables.
2754 KmpTaskTShareds,
2755 /// Task routine.
2756 KmpTaskTRoutine,
2757 /// Partition id for the untied tasks.
2758 KmpTaskTPartId,
2759 /// Function with call of destructors for private variables.
2760 Data1,
2761 /// Task priority.
2762 Data2,
2763 /// (Taskloops only) Lower bound.
2764 KmpTaskTLowerBound,
2765 /// (Taskloops only) Upper bound.
2766 KmpTaskTUpperBound,
2767 /// (Taskloops only) Stride.
2768 KmpTaskTStride,
2769 /// (Taskloops only) Is last iteration flag.
2770 KmpTaskTLastIter,
2771 /// (Taskloops only) Reduction data.
2772 KmpTaskTReductions,
2774 } // anonymous namespace
2776 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2777 // If we are in simd mode or there are no entries, we don't need to do
2778 // anything.
2779 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2780 return;
2782 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2783 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2784 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2785 SourceLocation Loc;
2786 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2787 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2788 E = CGM.getContext().getSourceManager().fileinfo_end();
2789 I != E; ++I) {
2790 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2791 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2792 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2793 I->getFirst(), EntryInfo.Line, 1);
2794 break;
2798 switch (Kind) {
2799 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2800 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2801 DiagnosticsEngine::Error, "Offloading entry for target region in "
2802 "%0 is incorrect: either the "
2803 "address or the ID is invalid.");
2804 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2805 } break;
2806 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2807 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2808 DiagnosticsEngine::Error, "Offloading entry for declare target "
2809 "variable %0 is incorrect: the "
2810 "address is invalid.");
2811 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2812 } break;
2813 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2814 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2815 DiagnosticsEngine::Error,
2816 "Offloading entry for declare target variable is incorrect: the "
2817 "address is invalid.");
2818 CGM.getDiags().Report(DiagID);
2819 } break;
2823 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2826 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2827 if (!KmpRoutineEntryPtrTy) {
2828 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2829 ASTContext &C = CGM.getContext();
2830 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2831 FunctionProtoType::ExtProtoInfo EPI;
2832 KmpRoutineEntryPtrQTy = C.getPointerType(
2833 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2834 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2838 namespace {
2839 struct PrivateHelpersTy {
2840 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2841 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2842 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2843 PrivateElemInit(PrivateElemInit) {}
2844 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2845 const Expr *OriginalRef = nullptr;
2846 const VarDecl *Original = nullptr;
2847 const VarDecl *PrivateCopy = nullptr;
2848 const VarDecl *PrivateElemInit = nullptr;
2849 bool isLocalPrivate() const {
2850 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2853 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2854 } // anonymous namespace
2856 static bool isAllocatableDecl(const VarDecl *VD) {
2857 const VarDecl *CVD = VD->getCanonicalDecl();
2858 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2859 return false;
2860 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2861 // Use the default allocation.
2862 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2863 !AA->getAllocator());
2866 static RecordDecl *
2867 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2868 if (!Privates.empty()) {
2869 ASTContext &C = CGM.getContext();
2870 // Build struct .kmp_privates_t. {
2871 // /* private vars */
2872 // };
2873 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2874 RD->startDefinition();
2875 for (const auto &Pair : Privates) {
2876 const VarDecl *VD = Pair.second.Original;
2877 QualType Type = VD->getType().getNonReferenceType();
2878 // If the private variable is a local variable with lvalue ref type,
2879 // allocate the pointer instead of the pointee type.
2880 if (Pair.second.isLocalPrivate()) {
2881 if (VD->getType()->isLValueReferenceType())
2882 Type = C.getPointerType(Type);
2883 if (isAllocatableDecl(VD))
2884 Type = C.getPointerType(Type);
2886 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2887 if (VD->hasAttrs()) {
2888 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2889 E(VD->getAttrs().end());
2890 I != E; ++I)
2891 FD->addAttr(*I);
2894 RD->completeDefinition();
2895 return RD;
2897 return nullptr;
2900 static RecordDecl *
2901 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2902 QualType KmpInt32Ty,
2903 QualType KmpRoutineEntryPointerQTy) {
2904 ASTContext &C = CGM.getContext();
2905 // Build struct kmp_task_t {
2906 // void * shareds;
2907 // kmp_routine_entry_t routine;
2908 // kmp_int32 part_id;
2909 // kmp_cmplrdata_t data1;
2910 // kmp_cmplrdata_t data2;
2911 // For taskloops additional fields:
2912 // kmp_uint64 lb;
2913 // kmp_uint64 ub;
2914 // kmp_int64 st;
2915 // kmp_int32 liter;
2916 // void * reductions;
2917 // };
2918 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2919 UD->startDefinition();
2920 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2921 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2922 UD->completeDefinition();
2923 QualType KmpCmplrdataTy = C.getRecordType(UD);
2924 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2925 RD->startDefinition();
2926 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2927 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2928 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2929 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2930 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2931 if (isOpenMPTaskLoopDirective(Kind)) {
2932 QualType KmpUInt64Ty =
2933 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2934 QualType KmpInt64Ty =
2935 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2936 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2937 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2938 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2939 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2940 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2942 RD->completeDefinition();
2943 return RD;
2946 static RecordDecl *
2947 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2948 ArrayRef<PrivateDataTy> Privates) {
2949 ASTContext &C = CGM.getContext();
2950 // Build struct kmp_task_t_with_privates {
2951 // kmp_task_t task_data;
2952 // .kmp_privates_t. privates;
2953 // };
2954 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2955 RD->startDefinition();
2956 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2957 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2958 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2959 RD->completeDefinition();
2960 return RD;
2963 /// Emit a proxy function which accepts kmp_task_t as the second
2964 /// argument.
2965 /// \code
2966 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2967 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2968 /// For taskloops:
2969 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2970 /// tt->reductions, tt->shareds);
2971 /// return 0;
2972 /// }
2973 /// \endcode
2974 static llvm::Function *
2975 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2976 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2977 QualType KmpTaskTWithPrivatesPtrQTy,
2978 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2979 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2980 llvm::Value *TaskPrivatesMap) {
2981 ASTContext &C = CGM.getContext();
2982 FunctionArgList Args;
2983 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2984 ImplicitParamKind::Other);
2985 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2986 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2987 ImplicitParamKind::Other);
2988 Args.push_back(&GtidArg);
2989 Args.push_back(&TaskTypeArg);
2990 const auto &TaskEntryFnInfo =
2991 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
2992 llvm::FunctionType *TaskEntryTy =
2993 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2994 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
2995 auto *TaskEntry = llvm::Function::Create(
2996 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
2997 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
2998 TaskEntry->setDoesNotRecurse();
2999 CodeGenFunction CGF(CGM);
3000 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3001 Loc, Loc);
3003 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3004 // tt,
3005 // For taskloops:
3006 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3007 // tt->task_data.shareds);
3008 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3009 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3010 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3011 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3012 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3013 const auto *KmpTaskTWithPrivatesQTyRD =
3014 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3015 LValue Base =
3016 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3017 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3018 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3019 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3020 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3022 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3023 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3024 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3025 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3026 CGF.ConvertTypeForMem(SharedsPtrTy));
3028 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3029 llvm::Value *PrivatesParam;
3030 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3031 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3032 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3033 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3034 } else {
3035 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3038 llvm::Value *CommonArgs[] = {
3039 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3040 CGF.Builder
3041 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3042 CGF.VoidPtrTy, CGF.Int8Ty)
3043 .emitRawPointer(CGF)};
3044 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3045 std::end(CommonArgs));
3046 if (isOpenMPTaskLoopDirective(Kind)) {
3047 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3048 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3049 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3050 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3051 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3052 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3053 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3054 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3055 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3056 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3057 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3058 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3059 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3060 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3061 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3062 CallArgs.push_back(LBParam);
3063 CallArgs.push_back(UBParam);
3064 CallArgs.push_back(StParam);
3065 CallArgs.push_back(LIParam);
3066 CallArgs.push_back(RParam);
3068 CallArgs.push_back(SharedsParam);
3070 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3071 CallArgs);
3072 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3073 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3074 CGF.FinishFunction();
3075 return TaskEntry;
3078 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3079 SourceLocation Loc,
3080 QualType KmpInt32Ty,
3081 QualType KmpTaskTWithPrivatesPtrQTy,
3082 QualType KmpTaskTWithPrivatesQTy) {
3083 ASTContext &C = CGM.getContext();
3084 FunctionArgList Args;
3085 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3086 ImplicitParamKind::Other);
3087 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3088 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3089 ImplicitParamKind::Other);
3090 Args.push_back(&GtidArg);
3091 Args.push_back(&TaskTypeArg);
3092 const auto &DestructorFnInfo =
3093 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3094 llvm::FunctionType *DestructorFnTy =
3095 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3096 std::string Name =
3097 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3098 auto *DestructorFn =
3099 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3100 Name, &CGM.getModule());
3101 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3102 DestructorFnInfo);
3103 DestructorFn->setDoesNotRecurse();
3104 CodeGenFunction CGF(CGM);
3105 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3106 Args, Loc, Loc);
3108 LValue Base = CGF.EmitLoadOfPointerLValue(
3109 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3110 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3111 const auto *KmpTaskTWithPrivatesQTyRD =
3112 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3113 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3114 Base = CGF.EmitLValueForField(Base, *FI);
3115 for (const auto *Field :
3116 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3117 if (QualType::DestructionKind DtorKind =
3118 Field->getType().isDestructedType()) {
3119 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3120 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3123 CGF.FinishFunction();
3124 return DestructorFn;
3127 /// Emit a privates mapping function for correct handling of private and
3128 /// firstprivate variables.
3129 /// \code
3130 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3131 /// **noalias priv1,..., <tyn> **noalias privn) {
3132 /// *priv1 = &.privates.priv1;
3133 /// ...;
3134 /// *privn = &.privates.privn;
3135 /// }
3136 /// \endcode
3137 static llvm::Value *
3138 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3139 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3140 ArrayRef<PrivateDataTy> Privates) {
3141 ASTContext &C = CGM.getContext();
3142 FunctionArgList Args;
3143 ImplicitParamDecl TaskPrivatesArg(
3144 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3145 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3146 ImplicitParamKind::Other);
3147 Args.push_back(&TaskPrivatesArg);
3148 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3149 unsigned Counter = 1;
3150 for (const Expr *E : Data.PrivateVars) {
3151 Args.push_back(ImplicitParamDecl::Create(
3152 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3153 C.getPointerType(C.getPointerType(E->getType()))
3154 .withConst()
3155 .withRestrict(),
3156 ImplicitParamKind::Other));
3157 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3158 PrivateVarsPos[VD] = Counter;
3159 ++Counter;
3161 for (const Expr *E : Data.FirstprivateVars) {
3162 Args.push_back(ImplicitParamDecl::Create(
3163 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3164 C.getPointerType(C.getPointerType(E->getType()))
3165 .withConst()
3166 .withRestrict(),
3167 ImplicitParamKind::Other));
3168 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3169 PrivateVarsPos[VD] = Counter;
3170 ++Counter;
3172 for (const Expr *E : Data.LastprivateVars) {
3173 Args.push_back(ImplicitParamDecl::Create(
3174 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3175 C.getPointerType(C.getPointerType(E->getType()))
3176 .withConst()
3177 .withRestrict(),
3178 ImplicitParamKind::Other));
3179 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3180 PrivateVarsPos[VD] = Counter;
3181 ++Counter;
3183 for (const VarDecl *VD : Data.PrivateLocals) {
3184 QualType Ty = VD->getType().getNonReferenceType();
3185 if (VD->getType()->isLValueReferenceType())
3186 Ty = C.getPointerType(Ty);
3187 if (isAllocatableDecl(VD))
3188 Ty = C.getPointerType(Ty);
3189 Args.push_back(ImplicitParamDecl::Create(
3190 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3191 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3192 ImplicitParamKind::Other));
3193 PrivateVarsPos[VD] = Counter;
3194 ++Counter;
3196 const auto &TaskPrivatesMapFnInfo =
3197 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3198 llvm::FunctionType *TaskPrivatesMapTy =
3199 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3200 std::string Name =
3201 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3202 auto *TaskPrivatesMap = llvm::Function::Create(
3203 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3204 &CGM.getModule());
3205 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3206 TaskPrivatesMapFnInfo);
3207 if (CGM.getLangOpts().Optimize) {
3208 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3209 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3210 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3212 CodeGenFunction CGF(CGM);
3213 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3214 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3216 // *privi = &.privates.privi;
3217 LValue Base = CGF.EmitLoadOfPointerLValue(
3218 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3219 TaskPrivatesArg.getType()->castAs<PointerType>());
3220 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3221 Counter = 0;
3222 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3223 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3224 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3225 LValue RefLVal =
3226 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3227 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3228 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3229 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3230 ++Counter;
3232 CGF.FinishFunction();
3233 return TaskPrivatesMap;
3236 /// Emit initialization for private variables in task-based directives.
3237 static void emitPrivatesInit(CodeGenFunction &CGF,
3238 const OMPExecutableDirective &D,
3239 Address KmpTaskSharedsPtr, LValue TDBase,
3240 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3241 QualType SharedsTy, QualType SharedsPtrTy,
3242 const OMPTaskDataTy &Data,
3243 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3244 ASTContext &C = CGF.getContext();
3245 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3246 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3247 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3248 ? OMPD_taskloop
3249 : OMPD_task;
3250 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3251 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3252 LValue SrcBase;
3253 bool IsTargetTask =
3254 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3255 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3256 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3257 // PointersArray, SizesArray, and MappersArray. The original variables for
3258 // these arrays are not captured and we get their addresses explicitly.
3259 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3260 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3261 SrcBase = CGF.MakeAddrLValue(
3262 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3263 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3264 CGF.ConvertTypeForMem(SharedsTy)),
3265 SharedsTy);
3267 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3268 for (const PrivateDataTy &Pair : Privates) {
3269 // Do not initialize private locals.
3270 if (Pair.second.isLocalPrivate()) {
3271 ++FI;
3272 continue;
3274 const VarDecl *VD = Pair.second.PrivateCopy;
3275 const Expr *Init = VD->getAnyInitializer();
3276 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3277 !CGF.isTrivialInitializer(Init)))) {
3278 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3279 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3280 const VarDecl *OriginalVD = Pair.second.Original;
3281 // Check if the variable is the target-based BasePointersArray,
3282 // PointersArray, SizesArray, or MappersArray.
3283 LValue SharedRefLValue;
3284 QualType Type = PrivateLValue.getType();
3285 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3286 if (IsTargetTask && !SharedField) {
3287 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3288 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3289 cast<CapturedDecl>(OriginalVD->getDeclContext())
3290 ->getNumParams() == 0 &&
3291 isa<TranslationUnitDecl>(
3292 cast<CapturedDecl>(OriginalVD->getDeclContext())
3293 ->getDeclContext()) &&
3294 "Expected artificial target data variable.");
3295 SharedRefLValue =
3296 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3297 } else if (ForDup) {
3298 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3299 SharedRefLValue = CGF.MakeAddrLValue(
3300 SharedRefLValue.getAddress().withAlignment(
3301 C.getDeclAlign(OriginalVD)),
3302 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3303 SharedRefLValue.getTBAAInfo());
3304 } else if (CGF.LambdaCaptureFields.count(
3305 Pair.second.Original->getCanonicalDecl()) > 0 ||
3306 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3307 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3308 } else {
3309 // Processing for implicitly captured variables.
3310 InlinedOpenMPRegionRAII Region(
3311 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3312 /*HasCancel=*/false, /*NoInheritance=*/true);
3313 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3315 if (Type->isArrayType()) {
3316 // Initialize firstprivate array.
3317 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3318 // Perform simple memcpy.
3319 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3320 } else {
3321 // Initialize firstprivate array using element-by-element
3322 // initialization.
3323 CGF.EmitOMPAggregateAssign(
3324 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3325 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3326 Address SrcElement) {
3327 // Clean up any temporaries needed by the initialization.
3328 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3329 InitScope.addPrivate(Elem, SrcElement);
3330 (void)InitScope.Privatize();
3331 // Emit initialization for single element.
3332 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3333 CGF, &CapturesInfo);
3334 CGF.EmitAnyExprToMem(Init, DestElement,
3335 Init->getType().getQualifiers(),
3336 /*IsInitializer=*/false);
3339 } else {
3340 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3341 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3342 (void)InitScope.Privatize();
3343 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3344 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3345 /*capturedByInit=*/false);
3347 } else {
3348 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3351 ++FI;
3355 /// Check if duplication function is required for taskloops.
3356 static bool checkInitIsRequired(CodeGenFunction &CGF,
3357 ArrayRef<PrivateDataTy> Privates) {
3358 bool InitRequired = false;
3359 for (const PrivateDataTy &Pair : Privates) {
3360 if (Pair.second.isLocalPrivate())
3361 continue;
3362 const VarDecl *VD = Pair.second.PrivateCopy;
3363 const Expr *Init = VD->getAnyInitializer();
3364 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3365 !CGF.isTrivialInitializer(Init));
3366 if (InitRequired)
3367 break;
3369 return InitRequired;
3373 /// Emit task_dup function (for initialization of
3374 /// private/firstprivate/lastprivate vars and last_iter flag)
3375 /// \code
3376 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3377 /// lastpriv) {
3378 /// // setup lastprivate flag
3379 /// task_dst->last = lastpriv;
3380 /// // could be constructor calls here...
3381 /// }
3382 /// \endcode
3383 static llvm::Value *
3384 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3385 const OMPExecutableDirective &D,
3386 QualType KmpTaskTWithPrivatesPtrQTy,
3387 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3388 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3389 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3390 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3391 ASTContext &C = CGM.getContext();
3392 FunctionArgList Args;
3393 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3394 KmpTaskTWithPrivatesPtrQTy,
3395 ImplicitParamKind::Other);
3396 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3397 KmpTaskTWithPrivatesPtrQTy,
3398 ImplicitParamKind::Other);
3399 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3400 ImplicitParamKind::Other);
3401 Args.push_back(&DstArg);
3402 Args.push_back(&SrcArg);
3403 Args.push_back(&LastprivArg);
3404 const auto &TaskDupFnInfo =
3405 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3406 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3407 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3408 auto *TaskDup = llvm::Function::Create(
3409 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3410 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3411 TaskDup->setDoesNotRecurse();
3412 CodeGenFunction CGF(CGM);
3413 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3414 Loc);
3416 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3417 CGF.GetAddrOfLocalVar(&DstArg),
3418 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3419 // task_dst->liter = lastpriv;
3420 if (WithLastIter) {
3421 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3422 LValue Base = CGF.EmitLValueForField(
3423 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3424 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3425 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3426 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3427 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3430 // Emit initial values for private copies (if any).
3431 assert(!Privates.empty());
3432 Address KmpTaskSharedsPtr = Address::invalid();
3433 if (!Data.FirstprivateVars.empty()) {
3434 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3435 CGF.GetAddrOfLocalVar(&SrcArg),
3436 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3437 LValue Base = CGF.EmitLValueForField(
3438 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3439 KmpTaskSharedsPtr = Address(
3440 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3441 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3442 KmpTaskTShareds)),
3443 Loc),
3444 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3446 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3447 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3448 CGF.FinishFunction();
3449 return TaskDup;
3452 /// Checks if destructor function is required to be generated.
3453 /// \return true if cleanups are required, false otherwise.
3454 static bool
3455 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3456 ArrayRef<PrivateDataTy> Privates) {
3457 for (const PrivateDataTy &P : Privates) {
3458 if (P.second.isLocalPrivate())
3459 continue;
3460 QualType Ty = P.second.Original->getType().getNonReferenceType();
3461 if (Ty.isDestructedType())
3462 return true;
3464 return false;
3467 namespace {
3468 /// Loop generator for OpenMP iterator expression.
3469 class OMPIteratorGeneratorScope final
3470 : public CodeGenFunction::OMPPrivateScope {
3471 CodeGenFunction &CGF;
3472 const OMPIteratorExpr *E = nullptr;
3473 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3474 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3475 OMPIteratorGeneratorScope() = delete;
3476 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3478 public:
3479 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3480 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3481 if (!E)
3482 return;
3483 SmallVector<llvm::Value *, 4> Uppers;
3484 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3485 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3486 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3487 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3488 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3489 addPrivate(
3490 HelperData.CounterVD,
3491 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3493 Privatize();
3495 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3496 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3497 LValue CLVal =
3498 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3499 HelperData.CounterVD->getType());
3500 // Counter = 0;
3501 CGF.EmitStoreOfScalar(
3502 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3503 CLVal);
3504 CodeGenFunction::JumpDest &ContDest =
3505 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3506 CodeGenFunction::JumpDest &ExitDest =
3507 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3508 // N = <number-of_iterations>;
3509 llvm::Value *N = Uppers[I];
3510 // cont:
3511 // if (Counter < N) goto body; else goto exit;
3512 CGF.EmitBlock(ContDest.getBlock());
3513 auto *CVal =
3514 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3515 llvm::Value *Cmp =
3516 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3517 ? CGF.Builder.CreateICmpSLT(CVal, N)
3518 : CGF.Builder.CreateICmpULT(CVal, N);
3519 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3520 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3521 // body:
3522 CGF.EmitBlock(BodyBB);
3523 // Iteri = Begini + Counter * Stepi;
3524 CGF.EmitIgnoredExpr(HelperData.Update);
3527 ~OMPIteratorGeneratorScope() {
3528 if (!E)
3529 return;
3530 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3531 // Counter = Counter + 1;
3532 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3533 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3534 // goto cont;
3535 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3536 // exit:
3537 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3541 } // namespace
3543 static std::pair<llvm::Value *, llvm::Value *>
3544 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3545 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3546 llvm::Value *Addr;
3547 if (OASE) {
3548 const Expr *Base = OASE->getBase();
3549 Addr = CGF.EmitScalarExpr(Base);
3550 } else {
3551 Addr = CGF.EmitLValue(E).getPointer(CGF);
3553 llvm::Value *SizeVal;
3554 QualType Ty = E->getType();
3555 if (OASE) {
3556 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3557 for (const Expr *SE : OASE->getDimensions()) {
3558 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3559 Sz = CGF.EmitScalarConversion(
3560 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3561 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3563 } else if (const auto *ASE =
3564 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3565 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3566 Address UpAddrAddress = UpAddrLVal.getAddress();
3567 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3568 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3569 /*Idx0=*/1);
3570 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3571 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3572 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3573 } else {
3574 SizeVal = CGF.getTypeSize(Ty);
3576 return std::make_pair(Addr, SizeVal);
3579 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3580 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3581 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3582 if (KmpTaskAffinityInfoTy.isNull()) {
3583 RecordDecl *KmpAffinityInfoRD =
3584 C.buildImplicitRecord("kmp_task_affinity_info_t");
3585 KmpAffinityInfoRD->startDefinition();
3586 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3587 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3588 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3589 KmpAffinityInfoRD->completeDefinition();
3590 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3594 CGOpenMPRuntime::TaskResultTy
3595 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3596 const OMPExecutableDirective &D,
3597 llvm::Function *TaskFunction, QualType SharedsTy,
3598 Address Shareds, const OMPTaskDataTy &Data) {
3599 ASTContext &C = CGM.getContext();
3600 llvm::SmallVector<PrivateDataTy, 4> Privates;
3601 // Aggregate privates and sort them by the alignment.
3602 const auto *I = Data.PrivateCopies.begin();
3603 for (const Expr *E : Data.PrivateVars) {
3604 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3605 Privates.emplace_back(
3606 C.getDeclAlign(VD),
3607 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3608 /*PrivateElemInit=*/nullptr));
3609 ++I;
3611 I = Data.FirstprivateCopies.begin();
3612 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3613 for (const Expr *E : Data.FirstprivateVars) {
3614 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3615 Privates.emplace_back(
3616 C.getDeclAlign(VD),
3617 PrivateHelpersTy(
3618 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3619 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3620 ++I;
3621 ++IElemInitRef;
3623 I = Data.LastprivateCopies.begin();
3624 for (const Expr *E : Data.LastprivateVars) {
3625 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3626 Privates.emplace_back(
3627 C.getDeclAlign(VD),
3628 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3629 /*PrivateElemInit=*/nullptr));
3630 ++I;
3632 for (const VarDecl *VD : Data.PrivateLocals) {
3633 if (isAllocatableDecl(VD))
3634 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3635 else
3636 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3638 llvm::stable_sort(Privates,
3639 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3640 return L.first > R.first;
3642 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3643 // Build type kmp_routine_entry_t (if not built yet).
3644 emitKmpRoutineEntryT(KmpInt32Ty);
3645 // Build type kmp_task_t (if not built yet).
3646 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3647 if (SavedKmpTaskloopTQTy.isNull()) {
3648 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3649 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3651 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3652 } else {
3653 assert((D.getDirectiveKind() == OMPD_task ||
3654 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3655 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3656 "Expected taskloop, task or target directive");
3657 if (SavedKmpTaskTQTy.isNull()) {
3658 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3659 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3661 KmpTaskTQTy = SavedKmpTaskTQTy;
3663 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3664 // Build particular struct kmp_task_t for the given task.
3665 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3666 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3667 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3668 QualType KmpTaskTWithPrivatesPtrQTy =
3669 C.getPointerType(KmpTaskTWithPrivatesQTy);
3670 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3671 llvm::Value *KmpTaskTWithPrivatesTySize =
3672 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3673 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3675 // Emit initial values for private copies (if any).
3676 llvm::Value *TaskPrivatesMap = nullptr;
3677 llvm::Type *TaskPrivatesMapTy =
3678 std::next(TaskFunction->arg_begin(), 3)->getType();
3679 if (!Privates.empty()) {
3680 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3681 TaskPrivatesMap =
3682 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3683 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3684 TaskPrivatesMap, TaskPrivatesMapTy);
3685 } else {
3686 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3687 cast<llvm::PointerType>(TaskPrivatesMapTy));
3689 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3690 // kmp_task_t *tt);
3691 llvm::Function *TaskEntry = emitProxyTaskFunction(
3692 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3693 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3694 TaskPrivatesMap);
3696 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3697 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3698 // kmp_routine_entry_t *task_entry);
3699 // Task flags. Format is taken from
3700 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3701 // description of kmp_tasking_flags struct.
3702 enum {
3703 TiedFlag = 0x1,
3704 FinalFlag = 0x2,
3705 DestructorsFlag = 0x8,
3706 PriorityFlag = 0x20,
3707 DetachableFlag = 0x40,
3709 unsigned Flags = Data.Tied ? TiedFlag : 0;
3710 bool NeedsCleanup = false;
3711 if (!Privates.empty()) {
3712 NeedsCleanup =
3713 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3714 if (NeedsCleanup)
3715 Flags = Flags | DestructorsFlag;
3717 if (Data.Priority.getInt())
3718 Flags = Flags | PriorityFlag;
3719 if (D.hasClausesOfKind<OMPDetachClause>())
3720 Flags = Flags | DetachableFlag;
3721 llvm::Value *TaskFlags =
3722 Data.Final.getPointer()
3723 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3724 CGF.Builder.getInt32(FinalFlag),
3725 CGF.Builder.getInt32(/*C=*/0))
3726 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3727 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3728 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3729 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3730 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3731 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3732 TaskEntry, KmpRoutineEntryPtrTy)};
3733 llvm::Value *NewTask;
3734 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3735 // Check if we have any device clause associated with the directive.
3736 const Expr *Device = nullptr;
3737 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3738 Device = C->getDevice();
3739 // Emit device ID if any otherwise use default value.
3740 llvm::Value *DeviceID;
3741 if (Device)
3742 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3743 CGF.Int64Ty, /*isSigned=*/true);
3744 else
3745 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3746 AllocArgs.push_back(DeviceID);
3747 NewTask = CGF.EmitRuntimeCall(
3748 OMPBuilder.getOrCreateRuntimeFunction(
3749 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3750 AllocArgs);
3751 } else {
3752 NewTask =
3753 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3754 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3755 AllocArgs);
3757 // Emit detach clause initialization.
3758 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3759 // task_descriptor);
3760 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3761 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3762 LValue EvtLVal = CGF.EmitLValue(Evt);
3764 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3765 // int gtid, kmp_task_t *task);
3766 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3767 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3768 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3769 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3770 OMPBuilder.getOrCreateRuntimeFunction(
3771 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3772 {Loc, Tid, NewTask});
3773 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3774 Evt->getExprLoc());
3775 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3777 // Process affinity clauses.
3778 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3779 // Process list of affinity data.
3780 ASTContext &C = CGM.getContext();
3781 Address AffinitiesArray = Address::invalid();
3782 // Calculate number of elements to form the array of affinity data.
3783 llvm::Value *NumOfElements = nullptr;
3784 unsigned NumAffinities = 0;
3785 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3786 if (const Expr *Modifier = C->getModifier()) {
3787 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3788 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3789 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3790 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3791 NumOfElements =
3792 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3794 } else {
3795 NumAffinities += C->varlist_size();
3798 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3799 // Fields ids in kmp_task_affinity_info record.
3800 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3802 QualType KmpTaskAffinityInfoArrayTy;
3803 if (NumOfElements) {
3804 NumOfElements = CGF.Builder.CreateNUWAdd(
3805 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3806 auto *OVE = new (C) OpaqueValueExpr(
3807 Loc,
3808 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3809 VK_PRValue);
3810 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3811 RValue::get(NumOfElements));
3812 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3813 KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3814 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3815 // Properly emit variable-sized array.
3816 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3817 ImplicitParamKind::Other);
3818 CGF.EmitVarDecl(*PD);
3819 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3820 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3821 /*isSigned=*/false);
3822 } else {
3823 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3824 KmpTaskAffinityInfoTy,
3825 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3826 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3827 AffinitiesArray =
3828 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3829 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3830 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3831 /*isSigned=*/false);
3834 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3835 // Fill array by elements without iterators.
3836 unsigned Pos = 0;
3837 bool HasIterator = false;
3838 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3839 if (C->getModifier()) {
3840 HasIterator = true;
3841 continue;
3843 for (const Expr *E : C->varlist()) {
3844 llvm::Value *Addr;
3845 llvm::Value *Size;
3846 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3847 LValue Base =
3848 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3849 KmpTaskAffinityInfoTy);
3850 // affs[i].base_addr = &<Affinities[i].second>;
3851 LValue BaseAddrLVal = CGF.EmitLValueForField(
3852 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3853 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3854 BaseAddrLVal);
3855 // affs[i].len = sizeof(<Affinities[i].second>);
3856 LValue LenLVal = CGF.EmitLValueForField(
3857 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3858 CGF.EmitStoreOfScalar(Size, LenLVal);
3859 ++Pos;
3862 LValue PosLVal;
3863 if (HasIterator) {
3864 PosLVal = CGF.MakeAddrLValue(
3865 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3866 C.getSizeType());
3867 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3869 // Process elements with iterators.
3870 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3871 const Expr *Modifier = C->getModifier();
3872 if (!Modifier)
3873 continue;
3874 OMPIteratorGeneratorScope IteratorScope(
3875 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3876 for (const Expr *E : C->varlist()) {
3877 llvm::Value *Addr;
3878 llvm::Value *Size;
3879 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3880 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3881 LValue Base =
3882 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3883 KmpTaskAffinityInfoTy);
3884 // affs[i].base_addr = &<Affinities[i].second>;
3885 LValue BaseAddrLVal = CGF.EmitLValueForField(
3886 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3887 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3888 BaseAddrLVal);
3889 // affs[i].len = sizeof(<Affinities[i].second>);
3890 LValue LenLVal = CGF.EmitLValueForField(
3891 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3892 CGF.EmitStoreOfScalar(Size, LenLVal);
3893 Idx = CGF.Builder.CreateNUWAdd(
3894 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3895 CGF.EmitStoreOfScalar(Idx, PosLVal);
3898 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3899 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3900 // naffins, kmp_task_affinity_info_t *affin_list);
3901 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3902 llvm::Value *GTid = getThreadID(CGF, Loc);
3903 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3904 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3905 // FIXME: Emit the function and ignore its result for now unless the
3906 // runtime function is properly implemented.
3907 (void)CGF.EmitRuntimeCall(
3908 OMPBuilder.getOrCreateRuntimeFunction(
3909 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3910 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3912 llvm::Value *NewTaskNewTaskTTy =
3913 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3914 NewTask, KmpTaskTWithPrivatesPtrTy);
3915 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3916 KmpTaskTWithPrivatesQTy);
3917 LValue TDBase =
3918 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3919 // Fill the data in the resulting kmp_task_t record.
3920 // Copy shareds if there are any.
3921 Address KmpTaskSharedsPtr = Address::invalid();
3922 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3923 KmpTaskSharedsPtr = Address(
3924 CGF.EmitLoadOfScalar(
3925 CGF.EmitLValueForField(
3926 TDBase,
3927 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3928 Loc),
3929 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3930 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3931 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3932 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3934 // Emit initial values for private copies (if any).
3935 TaskResultTy Result;
3936 if (!Privates.empty()) {
3937 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3938 SharedsTy, SharedsPtrTy, Data, Privates,
3939 /*ForDup=*/false);
3940 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3941 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3942 Result.TaskDupFn = emitTaskDupFunction(
3943 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3944 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3945 /*WithLastIter=*/!Data.LastprivateVars.empty());
3948 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3949 enum { Priority = 0, Destructors = 1 };
3950 // Provide pointer to function with destructors for privates.
3951 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3952 const RecordDecl *KmpCmplrdataUD =
3953 (*FI)->getType()->getAsUnionType()->getDecl();
3954 if (NeedsCleanup) {
3955 llvm::Value *DestructorFn = emitDestructorsFunction(
3956 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3957 KmpTaskTWithPrivatesQTy);
3958 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3959 LValue DestructorsLV = CGF.EmitLValueForField(
3960 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3961 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3962 DestructorFn, KmpRoutineEntryPtrTy),
3963 DestructorsLV);
3965 // Set priority.
3966 if (Data.Priority.getInt()) {
3967 LValue Data2LV = CGF.EmitLValueForField(
3968 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3969 LValue PriorityLV = CGF.EmitLValueForField(
3970 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3971 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3973 Result.NewTask = NewTask;
3974 Result.TaskEntry = TaskEntry;
3975 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3976 Result.TDBase = TDBase;
3977 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3978 return Result;
3981 /// Translates internal dependency kind into the runtime kind.
3982 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
3983 RTLDependenceKindTy DepKind;
3984 switch (K) {
3985 case OMPC_DEPEND_in:
3986 DepKind = RTLDependenceKindTy::DepIn;
3987 break;
3988 // Out and InOut dependencies must use the same code.
3989 case OMPC_DEPEND_out:
3990 case OMPC_DEPEND_inout:
3991 DepKind = RTLDependenceKindTy::DepInOut;
3992 break;
3993 case OMPC_DEPEND_mutexinoutset:
3994 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
3995 break;
3996 case OMPC_DEPEND_inoutset:
3997 DepKind = RTLDependenceKindTy::DepInOutSet;
3998 break;
3999 case OMPC_DEPEND_outallmemory:
4000 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4001 break;
4002 case OMPC_DEPEND_source:
4003 case OMPC_DEPEND_sink:
4004 case OMPC_DEPEND_depobj:
4005 case OMPC_DEPEND_inoutallmemory:
4006 case OMPC_DEPEND_unknown:
4007 llvm_unreachable("Unknown task dependence type");
4009 return DepKind;
4012 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4013 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4014 QualType &FlagsTy) {
4015 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4016 if (KmpDependInfoTy.isNull()) {
4017 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4018 KmpDependInfoRD->startDefinition();
4019 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4020 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4021 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4022 KmpDependInfoRD->completeDefinition();
4023 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4027 std::pair<llvm::Value *, LValue>
4028 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4029 SourceLocation Loc) {
4030 ASTContext &C = CGM.getContext();
4031 QualType FlagsTy;
4032 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4033 RecordDecl *KmpDependInfoRD =
4034 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4035 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4036 LValue Base = CGF.EmitLoadOfPointerLValue(
4037 DepobjLVal.getAddress().withElementType(
4038 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4039 KmpDependInfoPtrTy->castAs<PointerType>());
4040 Address DepObjAddr = CGF.Builder.CreateGEP(
4041 CGF, Base.getAddress(),
4042 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4043 LValue NumDepsBase = CGF.MakeAddrLValue(
4044 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4045 // NumDeps = deps[i].base_addr;
4046 LValue BaseAddrLVal = CGF.EmitLValueForField(
4047 NumDepsBase,
4048 *std::next(KmpDependInfoRD->field_begin(),
4049 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4050 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4051 return std::make_pair(NumDeps, Base);
4054 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4055 llvm::PointerUnion<unsigned *, LValue *> Pos,
4056 const OMPTaskDataTy::DependData &Data,
4057 Address DependenciesArray) {
4058 CodeGenModule &CGM = CGF.CGM;
4059 ASTContext &C = CGM.getContext();
4060 QualType FlagsTy;
4061 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4062 RecordDecl *KmpDependInfoRD =
4063 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4064 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4066 OMPIteratorGeneratorScope IteratorScope(
4067 CGF, cast_or_null<OMPIteratorExpr>(
4068 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4069 : nullptr));
4070 for (const Expr *E : Data.DepExprs) {
4071 llvm::Value *Addr;
4072 llvm::Value *Size;
4074 // The expression will be a nullptr in the 'omp_all_memory' case.
4075 if (E) {
4076 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4077 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4078 } else {
4079 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4080 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4082 LValue Base;
4083 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4084 Base = CGF.MakeAddrLValue(
4085 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4086 } else {
4087 assert(E && "Expected a non-null expression");
4088 LValue &PosLVal = *Pos.get<LValue *>();
4089 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4090 Base = CGF.MakeAddrLValue(
4091 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4093 // deps[i].base_addr = &<Dependencies[i].second>;
4094 LValue BaseAddrLVal = CGF.EmitLValueForField(
4095 Base,
4096 *std::next(KmpDependInfoRD->field_begin(),
4097 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4098 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4099 // deps[i].len = sizeof(<Dependencies[i].second>);
4100 LValue LenLVal = CGF.EmitLValueForField(
4101 Base, *std::next(KmpDependInfoRD->field_begin(),
4102 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4103 CGF.EmitStoreOfScalar(Size, LenLVal);
4104 // deps[i].flags = <Dependencies[i].first>;
4105 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4106 LValue FlagsLVal = CGF.EmitLValueForField(
4107 Base,
4108 *std::next(KmpDependInfoRD->field_begin(),
4109 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4110 CGF.EmitStoreOfScalar(
4111 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4112 FlagsLVal);
4113 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4114 ++(*P);
4115 } else {
4116 LValue &PosLVal = *Pos.get<LValue *>();
4117 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4118 Idx = CGF.Builder.CreateNUWAdd(Idx,
4119 llvm::ConstantInt::get(Idx->getType(), 1));
4120 CGF.EmitStoreOfScalar(Idx, PosLVal);
4125 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4126 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4127 const OMPTaskDataTy::DependData &Data) {
4128 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4129 "Expected depobj dependency kind.");
4130 SmallVector<llvm::Value *, 4> Sizes;
4131 SmallVector<LValue, 4> SizeLVals;
4132 ASTContext &C = CGF.getContext();
4134 OMPIteratorGeneratorScope IteratorScope(
4135 CGF, cast_or_null<OMPIteratorExpr>(
4136 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4137 : nullptr));
4138 for (const Expr *E : Data.DepExprs) {
4139 llvm::Value *NumDeps;
4140 LValue Base;
4141 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4142 std::tie(NumDeps, Base) =
4143 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4144 LValue NumLVal = CGF.MakeAddrLValue(
4145 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4146 C.getUIntPtrType());
4147 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4148 NumLVal.getAddress());
4149 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4150 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4151 CGF.EmitStoreOfScalar(Add, NumLVal);
4152 SizeLVals.push_back(NumLVal);
4155 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4156 llvm::Value *Size =
4157 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4158 Sizes.push_back(Size);
4160 return Sizes;
4163 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4164 QualType &KmpDependInfoTy,
4165 LValue PosLVal,
4166 const OMPTaskDataTy::DependData &Data,
4167 Address DependenciesArray) {
4168 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4169 "Expected depobj dependency kind.");
4170 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4172 OMPIteratorGeneratorScope IteratorScope(
4173 CGF, cast_or_null<OMPIteratorExpr>(
4174 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4175 : nullptr));
4176 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4177 const Expr *E = Data.DepExprs[I];
4178 llvm::Value *NumDeps;
4179 LValue Base;
4180 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4181 std::tie(NumDeps, Base) =
4182 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4184 // memcopy dependency data.
4185 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4186 ElSize,
4187 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4188 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4189 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4190 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4192 // Increase pos.
4193 // pos += size;
4194 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4195 CGF.EmitStoreOfScalar(Add, PosLVal);
4200 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4201 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4202 SourceLocation Loc) {
4203 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4204 return D.DepExprs.empty();
4206 return std::make_pair(nullptr, Address::invalid());
4207 // Process list of dependencies.
4208 ASTContext &C = CGM.getContext();
4209 Address DependenciesArray = Address::invalid();
4210 llvm::Value *NumOfElements = nullptr;
4211 unsigned NumDependencies = std::accumulate(
4212 Dependencies.begin(), Dependencies.end(), 0,
4213 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4214 return D.DepKind == OMPC_DEPEND_depobj
4216 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4218 QualType FlagsTy;
4219 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4220 bool HasDepobjDeps = false;
4221 bool HasRegularWithIterators = false;
4222 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4223 llvm::Value *NumOfRegularWithIterators =
4224 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4225 // Calculate number of depobj dependencies and regular deps with the
4226 // iterators.
4227 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4228 if (D.DepKind == OMPC_DEPEND_depobj) {
4229 SmallVector<llvm::Value *, 4> Sizes =
4230 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4231 for (llvm::Value *Size : Sizes) {
4232 NumOfDepobjElements =
4233 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4235 HasDepobjDeps = true;
4236 continue;
4238 // Include number of iterations, if any.
4240 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4241 llvm::Value *ClauseIteratorSpace =
4242 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4243 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4244 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4245 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4246 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4248 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4249 ClauseIteratorSpace,
4250 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4251 NumOfRegularWithIterators =
4252 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4253 HasRegularWithIterators = true;
4254 continue;
4258 QualType KmpDependInfoArrayTy;
4259 if (HasDepobjDeps || HasRegularWithIterators) {
4260 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4261 /*isSigned=*/false);
4262 if (HasDepobjDeps) {
4263 NumOfElements =
4264 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4266 if (HasRegularWithIterators) {
4267 NumOfElements =
4268 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4270 auto *OVE = new (C) OpaqueValueExpr(
4271 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4272 VK_PRValue);
4273 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4274 RValue::get(NumOfElements));
4275 KmpDependInfoArrayTy =
4276 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4277 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4278 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4279 // Properly emit variable-sized array.
4280 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4281 ImplicitParamKind::Other);
4282 CGF.EmitVarDecl(*PD);
4283 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4284 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4285 /*isSigned=*/false);
4286 } else {
4287 KmpDependInfoArrayTy = C.getConstantArrayType(
4288 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4289 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4290 DependenciesArray =
4291 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4292 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4293 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4294 /*isSigned=*/false);
4296 unsigned Pos = 0;
4297 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4298 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4299 Dependencies[I].IteratorExpr)
4300 continue;
4301 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4302 DependenciesArray);
4304 // Copy regular dependencies with iterators.
4305 LValue PosLVal = CGF.MakeAddrLValue(
4306 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4307 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4308 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4309 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4310 !Dependencies[I].IteratorExpr)
4311 continue;
4312 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4313 DependenciesArray);
4315 // Copy final depobj arrays without iterators.
4316 if (HasDepobjDeps) {
4317 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4318 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4319 continue;
4320 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4321 DependenciesArray);
4324 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4325 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4326 return std::make_pair(NumOfElements, DependenciesArray);
4329 Address CGOpenMPRuntime::emitDepobjDependClause(
4330 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4331 SourceLocation Loc) {
4332 if (Dependencies.DepExprs.empty())
4333 return Address::invalid();
4334 // Process list of dependencies.
4335 ASTContext &C = CGM.getContext();
4336 Address DependenciesArray = Address::invalid();
4337 unsigned NumDependencies = Dependencies.DepExprs.size();
4338 QualType FlagsTy;
4339 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4340 RecordDecl *KmpDependInfoRD =
4341 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4343 llvm::Value *Size;
4344 // Define type kmp_depend_info[<Dependencies.size()>];
4345 // For depobj reserve one extra element to store the number of elements.
4346 // It is required to handle depobj(x) update(in) construct.
4347 // kmp_depend_info[<Dependencies.size()>] deps;
4348 llvm::Value *NumDepsVal;
4349 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4350 if (const auto *IE =
4351 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4352 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4353 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4354 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4355 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4356 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4358 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4359 NumDepsVal);
4360 CharUnits SizeInBytes =
4361 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4362 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4363 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4364 NumDepsVal =
4365 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4366 } else {
4367 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4368 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4369 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4370 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4371 Size = CGM.getSize(Sz.alignTo(Align));
4372 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4374 // Need to allocate on the dynamic memory.
4375 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4376 // Use default allocator.
4377 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4378 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4380 llvm::Value *Addr =
4381 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4382 CGM.getModule(), OMPRTL___kmpc_alloc),
4383 Args, ".dep.arr.addr");
4384 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4385 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4386 Addr, CGF.Builder.getPtrTy(0));
4387 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4388 // Write number of elements in the first element of array for depobj.
4389 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4390 // deps[i].base_addr = NumDependencies;
4391 LValue BaseAddrLVal = CGF.EmitLValueForField(
4392 Base,
4393 *std::next(KmpDependInfoRD->field_begin(),
4394 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4395 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4396 llvm::PointerUnion<unsigned *, LValue *> Pos;
4397 unsigned Idx = 1;
4398 LValue PosLVal;
4399 if (Dependencies.IteratorExpr) {
4400 PosLVal = CGF.MakeAddrLValue(
4401 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4402 C.getSizeType());
4403 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4404 /*IsInit=*/true);
4405 Pos = &PosLVal;
4406 } else {
4407 Pos = &Idx;
4409 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4410 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4411 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4412 CGF.Int8Ty);
4413 return DependenciesArray;
4416 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4417 SourceLocation Loc) {
4418 ASTContext &C = CGM.getContext();
4419 QualType FlagsTy;
4420 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4421 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4422 C.VoidPtrTy.castAs<PointerType>());
4423 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4424 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4425 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4426 CGF.ConvertTypeForMem(KmpDependInfoTy));
4427 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4428 Addr.getElementType(), Addr.emitRawPointer(CGF),
4429 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4430 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4431 CGF.VoidPtrTy);
4432 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4433 // Use default allocator.
4434 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4435 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4437 // _kmpc_free(gtid, addr, nullptr);
4438 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4439 CGM.getModule(), OMPRTL___kmpc_free),
4440 Args);
4443 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4444 OpenMPDependClauseKind NewDepKind,
4445 SourceLocation Loc) {
4446 ASTContext &C = CGM.getContext();
4447 QualType FlagsTy;
4448 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4449 RecordDecl *KmpDependInfoRD =
4450 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4451 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4452 llvm::Value *NumDeps;
4453 LValue Base;
4454 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4456 Address Begin = Base.getAddress();
4457 // Cast from pointer to array type to pointer to single element.
4458 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4459 Begin.emitRawPointer(CGF), NumDeps);
4460 // The basic structure here is a while-do loop.
4461 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4462 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4463 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4464 CGF.EmitBlock(BodyBB);
4465 llvm::PHINode *ElementPHI =
4466 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4467 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4468 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4469 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4470 Base.getTBAAInfo());
4471 // deps[i].flags = NewDepKind;
4472 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4473 LValue FlagsLVal = CGF.EmitLValueForField(
4474 Base, *std::next(KmpDependInfoRD->field_begin(),
4475 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4476 CGF.EmitStoreOfScalar(
4477 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4478 FlagsLVal);
4480 // Shift the address forward by one element.
4481 llvm::Value *ElementNext =
4482 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4483 .emitRawPointer(CGF);
4484 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4485 llvm::Value *IsEmpty =
4486 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4487 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4488 // Done.
4489 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4492 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4493 const OMPExecutableDirective &D,
4494 llvm::Function *TaskFunction,
4495 QualType SharedsTy, Address Shareds,
4496 const Expr *IfCond,
4497 const OMPTaskDataTy &Data) {
4498 if (!CGF.HaveInsertPoint())
4499 return;
4501 TaskResultTy Result =
4502 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4503 llvm::Value *NewTask = Result.NewTask;
4504 llvm::Function *TaskEntry = Result.TaskEntry;
4505 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4506 LValue TDBase = Result.TDBase;
4507 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4508 // Process list of dependences.
4509 Address DependenciesArray = Address::invalid();
4510 llvm::Value *NumOfElements;
4511 std::tie(NumOfElements, DependenciesArray) =
4512 emitDependClause(CGF, Data.Dependences, Loc);
4514 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4515 // libcall.
4516 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4517 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4518 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4519 // list is not empty
4520 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4521 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4522 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4523 llvm::Value *DepTaskArgs[7];
4524 if (!Data.Dependences.empty()) {
4525 DepTaskArgs[0] = UpLoc;
4526 DepTaskArgs[1] = ThreadID;
4527 DepTaskArgs[2] = NewTask;
4528 DepTaskArgs[3] = NumOfElements;
4529 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4530 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4531 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4533 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4534 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4535 if (!Data.Tied) {
4536 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4537 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4538 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4540 if (!Data.Dependences.empty()) {
4541 CGF.EmitRuntimeCall(
4542 OMPBuilder.getOrCreateRuntimeFunction(
4543 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4544 DepTaskArgs);
4545 } else {
4546 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4547 CGM.getModule(), OMPRTL___kmpc_omp_task),
4548 TaskArgs);
4550 // Check if parent region is untied and build return for untied task;
4551 if (auto *Region =
4552 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4553 Region->emitUntiedSwitch(CGF);
4556 llvm::Value *DepWaitTaskArgs[7];
4557 if (!Data.Dependences.empty()) {
4558 DepWaitTaskArgs[0] = UpLoc;
4559 DepWaitTaskArgs[1] = ThreadID;
4560 DepWaitTaskArgs[2] = NumOfElements;
4561 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4562 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4563 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4564 DepWaitTaskArgs[6] =
4565 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4567 auto &M = CGM.getModule();
4568 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4569 TaskEntry, &Data, &DepWaitTaskArgs,
4570 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4571 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4572 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4573 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4574 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4575 // is specified.
4576 if (!Data.Dependences.empty())
4577 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4578 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4579 DepWaitTaskArgs);
4580 // Call proxy_task_entry(gtid, new_task);
4581 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4582 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4583 Action.Enter(CGF);
4584 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4585 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4586 OutlinedFnArgs);
4589 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4590 // kmp_task_t *new_task);
4591 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4592 // kmp_task_t *new_task);
4593 RegionCodeGenTy RCG(CodeGen);
4594 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4595 M, OMPRTL___kmpc_omp_task_begin_if0),
4596 TaskArgs,
4597 OMPBuilder.getOrCreateRuntimeFunction(
4598 M, OMPRTL___kmpc_omp_task_complete_if0),
4599 TaskArgs);
4600 RCG.setAction(Action);
4601 RCG(CGF);
4604 if (IfCond) {
4605 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4606 } else {
4607 RegionCodeGenTy ThenRCG(ThenCodeGen);
4608 ThenRCG(CGF);
4612 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4613 const OMPLoopDirective &D,
4614 llvm::Function *TaskFunction,
4615 QualType SharedsTy, Address Shareds,
4616 const Expr *IfCond,
4617 const OMPTaskDataTy &Data) {
4618 if (!CGF.HaveInsertPoint())
4619 return;
4620 TaskResultTy Result =
4621 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4622 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4623 // libcall.
4624 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4625 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4626 // sched, kmp_uint64 grainsize, void *task_dup);
4627 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4628 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4629 llvm::Value *IfVal;
4630 if (IfCond) {
4631 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4632 /*isSigned=*/true);
4633 } else {
4634 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4637 LValue LBLVal = CGF.EmitLValueForField(
4638 Result.TDBase,
4639 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4640 const auto *LBVar =
4641 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4642 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4643 /*IsInitializer=*/true);
4644 LValue UBLVal = CGF.EmitLValueForField(
4645 Result.TDBase,
4646 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4647 const auto *UBVar =
4648 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4649 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4650 /*IsInitializer=*/true);
4651 LValue StLVal = CGF.EmitLValueForField(
4652 Result.TDBase,
4653 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4654 const auto *StVar =
4655 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4656 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4657 /*IsInitializer=*/true);
4658 // Store reductions address.
4659 LValue RedLVal = CGF.EmitLValueForField(
4660 Result.TDBase,
4661 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4662 if (Data.Reductions) {
4663 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4664 } else {
4665 CGF.EmitNullInitialization(RedLVal.getAddress(),
4666 CGF.getContext().VoidPtrTy);
4668 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4669 llvm::Value *TaskArgs[] = {
4670 UpLoc,
4671 ThreadID,
4672 Result.NewTask,
4673 IfVal,
4674 LBLVal.getPointer(CGF),
4675 UBLVal.getPointer(CGF),
4676 CGF.EmitLoadOfScalar(StLVal, Loc),
4677 llvm::ConstantInt::getSigned(
4678 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4679 llvm::ConstantInt::getSigned(
4680 CGF.IntTy, Data.Schedule.getPointer()
4681 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4682 : NoSchedule),
4683 Data.Schedule.getPointer()
4684 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4685 /*isSigned=*/false)
4686 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4687 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4688 Result.TaskDupFn, CGF.VoidPtrTy)
4689 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4690 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4691 CGM.getModule(), OMPRTL___kmpc_taskloop),
4692 TaskArgs);
4695 /// Emit reduction operation for each element of array (required for
4696 /// array sections) LHS op = RHS.
4697 /// \param Type Type of array.
4698 /// \param LHSVar Variable on the left side of the reduction operation
4699 /// (references element of array in original variable).
4700 /// \param RHSVar Variable on the right side of the reduction operation
4701 /// (references element of array in original variable).
4702 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4703 /// RHSVar.
4704 static void EmitOMPAggregateReduction(
4705 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4706 const VarDecl *RHSVar,
4707 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4708 const Expr *, const Expr *)> &RedOpGen,
4709 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4710 const Expr *UpExpr = nullptr) {
4711 // Perform element-by-element initialization.
4712 QualType ElementTy;
4713 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4714 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4716 // Drill down to the base element type on both arrays.
4717 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4718 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4720 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4721 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4722 // Cast from pointer to array type to pointer to single element.
4723 llvm::Value *LHSEnd =
4724 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4725 // The basic structure here is a while-do loop.
4726 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4727 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4728 llvm::Value *IsEmpty =
4729 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4730 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4732 // Enter the loop body, making that address the current address.
4733 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4734 CGF.EmitBlock(BodyBB);
4736 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4738 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4739 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4740 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4741 Address RHSElementCurrent(
4742 RHSElementPHI, RHSAddr.getElementType(),
4743 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4745 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4746 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4747 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4748 Address LHSElementCurrent(
4749 LHSElementPHI, LHSAddr.getElementType(),
4750 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4752 // Emit copy.
4753 CodeGenFunction::OMPPrivateScope Scope(CGF);
4754 Scope.addPrivate(LHSVar, LHSElementCurrent);
4755 Scope.addPrivate(RHSVar, RHSElementCurrent);
4756 Scope.Privatize();
4757 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4758 Scope.ForceCleanup();
4760 // Shift the address forward by one element.
4761 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4762 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4763 "omp.arraycpy.dest.element");
4764 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4765 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4766 "omp.arraycpy.src.element");
4767 // Check whether we've reached the end.
4768 llvm::Value *Done =
4769 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4770 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4771 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4772 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4774 // Done.
4775 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4778 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4779 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4780 /// UDR combiner function.
4781 static void emitReductionCombiner(CodeGenFunction &CGF,
4782 const Expr *ReductionOp) {
4783 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4784 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4785 if (const auto *DRE =
4786 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4787 if (const auto *DRD =
4788 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4789 std::pair<llvm::Function *, llvm::Function *> Reduction =
4790 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4791 RValue Func = RValue::get(Reduction.first);
4792 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4793 CGF.EmitIgnoredExpr(ReductionOp);
4794 return;
4796 CGF.EmitIgnoredExpr(ReductionOp);
4799 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4800 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4801 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4802 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4803 ASTContext &C = CGM.getContext();
4805 // void reduction_func(void *LHSArg, void *RHSArg);
4806 FunctionArgList Args;
4807 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4808 ImplicitParamKind::Other);
4809 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4810 ImplicitParamKind::Other);
4811 Args.push_back(&LHSArg);
4812 Args.push_back(&RHSArg);
4813 const auto &CGFI =
4814 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4815 std::string Name = getReductionFuncName(ReducerName);
4816 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4817 llvm::GlobalValue::InternalLinkage, Name,
4818 &CGM.getModule());
4819 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4820 Fn->setDoesNotRecurse();
4821 CodeGenFunction CGF(CGM);
4822 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4824 // Dst = (void*[n])(LHSArg);
4825 // Src = (void*[n])(RHSArg);
4826 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4827 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4828 CGF.Builder.getPtrTy(0)),
4829 ArgsElemType, CGF.getPointerAlign());
4830 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4831 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4832 CGF.Builder.getPtrTy(0)),
4833 ArgsElemType, CGF.getPointerAlign());
4835 // ...
4836 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4837 // ...
4838 CodeGenFunction::OMPPrivateScope Scope(CGF);
4839 const auto *IPriv = Privates.begin();
4840 unsigned Idx = 0;
4841 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4842 const auto *RHSVar =
4843 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4844 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4845 const auto *LHSVar =
4846 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4847 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4848 QualType PrivTy = (*IPriv)->getType();
4849 if (PrivTy->isVariablyModifiedType()) {
4850 // Get array size and emit VLA type.
4851 ++Idx;
4852 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4853 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4854 const VariableArrayType *VLA =
4855 CGF.getContext().getAsVariableArrayType(PrivTy);
4856 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4857 CodeGenFunction::OpaqueValueMapping OpaqueMap(
4858 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4859 CGF.EmitVariablyModifiedType(PrivTy);
4862 Scope.Privatize();
4863 IPriv = Privates.begin();
4864 const auto *ILHS = LHSExprs.begin();
4865 const auto *IRHS = RHSExprs.begin();
4866 for (const Expr *E : ReductionOps) {
4867 if ((*IPriv)->getType()->isArrayType()) {
4868 // Emit reduction for array section.
4869 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4870 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4871 EmitOMPAggregateReduction(
4872 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4873 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4874 emitReductionCombiner(CGF, E);
4876 } else {
4877 // Emit reduction for array subscript or single variable.
4878 emitReductionCombiner(CGF, E);
4880 ++IPriv;
4881 ++ILHS;
4882 ++IRHS;
4884 Scope.ForceCleanup();
4885 CGF.FinishFunction();
4886 return Fn;
4889 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4890 const Expr *ReductionOp,
4891 const Expr *PrivateRef,
4892 const DeclRefExpr *LHS,
4893 const DeclRefExpr *RHS) {
4894 if (PrivateRef->getType()->isArrayType()) {
4895 // Emit reduction for array section.
4896 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4897 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4898 EmitOMPAggregateReduction(
4899 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4900 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4901 emitReductionCombiner(CGF, ReductionOp);
4903 } else {
4904 // Emit reduction for array subscript or single variable.
4905 emitReductionCombiner(CGF, ReductionOp);
4909 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4910 ArrayRef<const Expr *> Privates,
4911 ArrayRef<const Expr *> LHSExprs,
4912 ArrayRef<const Expr *> RHSExprs,
4913 ArrayRef<const Expr *> ReductionOps,
4914 ReductionOptionsTy Options) {
4915 if (!CGF.HaveInsertPoint())
4916 return;
4918 bool WithNowait = Options.WithNowait;
4919 bool SimpleReduction = Options.SimpleReduction;
4921 // Next code should be emitted for reduction:
4923 // static kmp_critical_name lock = { 0 };
4925 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4926 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4927 // ...
4928 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4929 // *(Type<n>-1*)rhs[<n>-1]);
4930 // }
4932 // ...
4933 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4934 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4935 // RedList, reduce_func, &<lock>)) {
4936 // case 1:
4937 // ...
4938 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4939 // ...
4940 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4941 // break;
4942 // case 2:
4943 // ...
4944 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4945 // ...
4946 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4947 // break;
4948 // default:;
4949 // }
4951 // if SimpleReduction is true, only the next code is generated:
4952 // ...
4953 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4954 // ...
4956 ASTContext &C = CGM.getContext();
4958 if (SimpleReduction) {
4959 CodeGenFunction::RunCleanupsScope Scope(CGF);
4960 const auto *IPriv = Privates.begin();
4961 const auto *ILHS = LHSExprs.begin();
4962 const auto *IRHS = RHSExprs.begin();
4963 for (const Expr *E : ReductionOps) {
4964 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4965 cast<DeclRefExpr>(*IRHS));
4966 ++IPriv;
4967 ++ILHS;
4968 ++IRHS;
4970 return;
4973 // 1. Build a list of reduction variables.
4974 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4975 auto Size = RHSExprs.size();
4976 for (const Expr *E : Privates) {
4977 if (E->getType()->isVariablyModifiedType())
4978 // Reserve place for array size.
4979 ++Size;
4981 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4982 QualType ReductionArrayTy = C.getConstantArrayType(
4983 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
4984 /*IndexTypeQuals=*/0);
4985 RawAddress ReductionList =
4986 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4987 const auto *IPriv = Privates.begin();
4988 unsigned Idx = 0;
4989 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4990 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
4991 CGF.Builder.CreateStore(
4992 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4993 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
4994 Elem);
4995 if ((*IPriv)->getType()->isVariablyModifiedType()) {
4996 // Store array size.
4997 ++Idx;
4998 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
4999 llvm::Value *Size = CGF.Builder.CreateIntCast(
5000 CGF.getVLASize(
5001 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5002 .NumElts,
5003 CGF.SizeTy, /*isSigned=*/false);
5004 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5005 Elem);
5009 // 2. Emit reduce_func().
5010 llvm::Function *ReductionFn = emitReductionFunction(
5011 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5012 Privates, LHSExprs, RHSExprs, ReductionOps);
5014 // 3. Create static kmp_critical_name lock = { 0 };
5015 std::string Name = getName({"reduction"});
5016 llvm::Value *Lock = getCriticalRegionLock(Name);
5018 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5019 // RedList, reduce_func, &<lock>);
5020 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5021 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5022 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5023 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5024 ReductionList.getPointer(), CGF.VoidPtrTy);
5025 llvm::Value *Args[] = {
5026 IdentTLoc, // ident_t *<loc>
5027 ThreadId, // i32 <gtid>
5028 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5029 ReductionArrayTySize, // size_type sizeof(RedList)
5030 RL, // void *RedList
5031 ReductionFn, // void (*) (void *, void *) <reduce_func>
5032 Lock // kmp_critical_name *&<lock>
5034 llvm::Value *Res = CGF.EmitRuntimeCall(
5035 OMPBuilder.getOrCreateRuntimeFunction(
5036 CGM.getModule(),
5037 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5038 Args);
5040 // 5. Build switch(res)
5041 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5042 llvm::SwitchInst *SwInst =
5043 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5045 // 6. Build case 1:
5046 // ...
5047 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5048 // ...
5049 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5050 // break;
5051 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5052 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5053 CGF.EmitBlock(Case1BB);
5055 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5056 llvm::Value *EndArgs[] = {
5057 IdentTLoc, // ident_t *<loc>
5058 ThreadId, // i32 <gtid>
5059 Lock // kmp_critical_name *&<lock>
5061 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5062 CodeGenFunction &CGF, PrePostActionTy &Action) {
5063 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5064 const auto *IPriv = Privates.begin();
5065 const auto *ILHS = LHSExprs.begin();
5066 const auto *IRHS = RHSExprs.begin();
5067 for (const Expr *E : ReductionOps) {
5068 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5069 cast<DeclRefExpr>(*IRHS));
5070 ++IPriv;
5071 ++ILHS;
5072 ++IRHS;
5075 RegionCodeGenTy RCG(CodeGen);
5076 CommonActionTy Action(
5077 nullptr, {},
5078 OMPBuilder.getOrCreateRuntimeFunction(
5079 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5080 : OMPRTL___kmpc_end_reduce),
5081 EndArgs);
5082 RCG.setAction(Action);
5083 RCG(CGF);
5085 CGF.EmitBranch(DefaultBB);
5087 // 7. Build case 2:
5088 // ...
5089 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5090 // ...
5091 // break;
5092 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5093 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5094 CGF.EmitBlock(Case2BB);
5096 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5097 CodeGenFunction &CGF, PrePostActionTy &Action) {
5098 const auto *ILHS = LHSExprs.begin();
5099 const auto *IRHS = RHSExprs.begin();
5100 const auto *IPriv = Privates.begin();
5101 for (const Expr *E : ReductionOps) {
5102 const Expr *XExpr = nullptr;
5103 const Expr *EExpr = nullptr;
5104 const Expr *UpExpr = nullptr;
5105 BinaryOperatorKind BO = BO_Comma;
5106 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5107 if (BO->getOpcode() == BO_Assign) {
5108 XExpr = BO->getLHS();
5109 UpExpr = BO->getRHS();
5112 // Try to emit update expression as a simple atomic.
5113 const Expr *RHSExpr = UpExpr;
5114 if (RHSExpr) {
5115 // Analyze RHS part of the whole expression.
5116 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5117 RHSExpr->IgnoreParenImpCasts())) {
5118 // If this is a conditional operator, analyze its condition for
5119 // min/max reduction operator.
5120 RHSExpr = ACO->getCond();
5122 if (const auto *BORHS =
5123 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5124 EExpr = BORHS->getRHS();
5125 BO = BORHS->getOpcode();
5128 if (XExpr) {
5129 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5130 auto &&AtomicRedGen = [BO, VD,
5131 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5132 const Expr *EExpr, const Expr *UpExpr) {
5133 LValue X = CGF.EmitLValue(XExpr);
5134 RValue E;
5135 if (EExpr)
5136 E = CGF.EmitAnyExpr(EExpr);
5137 CGF.EmitOMPAtomicSimpleUpdateExpr(
5138 X, E, BO, /*IsXLHSInRHSPart=*/true,
5139 llvm::AtomicOrdering::Monotonic, Loc,
5140 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5141 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5142 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5143 CGF.emitOMPSimpleStore(
5144 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5145 VD->getType().getNonReferenceType(), Loc);
5146 PrivateScope.addPrivate(VD, LHSTemp);
5147 (void)PrivateScope.Privatize();
5148 return CGF.EmitAnyExpr(UpExpr);
5151 if ((*IPriv)->getType()->isArrayType()) {
5152 // Emit atomic reduction for array section.
5153 const auto *RHSVar =
5154 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5155 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5156 AtomicRedGen, XExpr, EExpr, UpExpr);
5157 } else {
5158 // Emit atomic reduction for array subscript or single variable.
5159 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5161 } else {
5162 // Emit as a critical region.
5163 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5164 const Expr *, const Expr *) {
5165 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5166 std::string Name = RT.getName({"atomic_reduction"});
5167 RT.emitCriticalRegion(
5168 CGF, Name,
5169 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5170 Action.Enter(CGF);
5171 emitReductionCombiner(CGF, E);
5173 Loc);
5175 if ((*IPriv)->getType()->isArrayType()) {
5176 const auto *LHSVar =
5177 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5178 const auto *RHSVar =
5179 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5180 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5181 CritRedGen);
5182 } else {
5183 CritRedGen(CGF, nullptr, nullptr, nullptr);
5186 ++ILHS;
5187 ++IRHS;
5188 ++IPriv;
5191 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5192 if (!WithNowait) {
5193 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5194 llvm::Value *EndArgs[] = {
5195 IdentTLoc, // ident_t *<loc>
5196 ThreadId, // i32 <gtid>
5197 Lock // kmp_critical_name *&<lock>
5199 CommonActionTy Action(nullptr, {},
5200 OMPBuilder.getOrCreateRuntimeFunction(
5201 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5202 EndArgs);
5203 AtomicRCG.setAction(Action);
5204 AtomicRCG(CGF);
5205 } else {
5206 AtomicRCG(CGF);
5209 CGF.EmitBranch(DefaultBB);
5210 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5213 /// Generates unique name for artificial threadprivate variables.
5214 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5215 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5216 const Expr *Ref) {
5217 SmallString<256> Buffer;
5218 llvm::raw_svector_ostream Out(Buffer);
5219 const clang::DeclRefExpr *DE;
5220 const VarDecl *D = ::getBaseDecl(Ref, DE);
5221 if (!D)
5222 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5223 D = D->getCanonicalDecl();
5224 std::string Name = CGM.getOpenMPRuntime().getName(
5225 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5226 Out << Prefix << Name << "_"
5227 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5228 return std::string(Out.str());
5231 /// Emits reduction initializer function:
5232 /// \code
5233 /// void @.red_init(void* %arg, void* %orig) {
5234 /// %0 = bitcast void* %arg to <type>*
5235 /// store <type> <init>, <type>* %0
5236 /// ret void
5237 /// }
5238 /// \endcode
5239 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5240 SourceLocation Loc,
5241 ReductionCodeGen &RCG, unsigned N) {
5242 ASTContext &C = CGM.getContext();
5243 QualType VoidPtrTy = C.VoidPtrTy;
5244 VoidPtrTy.addRestrict();
5245 FunctionArgList Args;
5246 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5247 ImplicitParamKind::Other);
5248 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5249 ImplicitParamKind::Other);
5250 Args.emplace_back(&Param);
5251 Args.emplace_back(&ParamOrig);
5252 const auto &FnInfo =
5253 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5254 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5255 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5256 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5257 Name, &CGM.getModule());
5258 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5259 Fn->setDoesNotRecurse();
5260 CodeGenFunction CGF(CGM);
5261 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5262 QualType PrivateType = RCG.getPrivateType(N);
5263 Address PrivateAddr = CGF.EmitLoadOfPointer(
5264 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5265 C.getPointerType(PrivateType)->castAs<PointerType>());
5266 llvm::Value *Size = nullptr;
5267 // If the size of the reduction item is non-constant, load it from global
5268 // threadprivate variable.
5269 if (RCG.getSizes(N).second) {
5270 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5271 CGF, CGM.getContext().getSizeType(),
5272 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5273 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5274 CGM.getContext().getSizeType(), Loc);
5276 RCG.emitAggregateType(CGF, N, Size);
5277 Address OrigAddr = Address::invalid();
5278 // If initializer uses initializer from declare reduction construct, emit a
5279 // pointer to the address of the original reduction item (reuired by reduction
5280 // initializer)
5281 if (RCG.usesReductionInitializer(N)) {
5282 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5283 OrigAddr = CGF.EmitLoadOfPointer(
5284 SharedAddr,
5285 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5287 // Emit the initializer:
5288 // %0 = bitcast void* %arg to <type>*
5289 // store <type> <init>, <type>* %0
5290 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5291 [](CodeGenFunction &) { return false; });
5292 CGF.FinishFunction();
5293 return Fn;
5296 /// Emits reduction combiner function:
5297 /// \code
5298 /// void @.red_comb(void* %arg0, void* %arg1) {
5299 /// %lhs = bitcast void* %arg0 to <type>*
5300 /// %rhs = bitcast void* %arg1 to <type>*
5301 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5302 /// store <type> %2, <type>* %lhs
5303 /// ret void
5304 /// }
5305 /// \endcode
5306 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5307 SourceLocation Loc,
5308 ReductionCodeGen &RCG, unsigned N,
5309 const Expr *ReductionOp,
5310 const Expr *LHS, const Expr *RHS,
5311 const Expr *PrivateRef) {
5312 ASTContext &C = CGM.getContext();
5313 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5314 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5315 FunctionArgList Args;
5316 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5317 C.VoidPtrTy, ImplicitParamKind::Other);
5318 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5319 ImplicitParamKind::Other);
5320 Args.emplace_back(&ParamInOut);
5321 Args.emplace_back(&ParamIn);
5322 const auto &FnInfo =
5323 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5324 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5325 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5326 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5327 Name, &CGM.getModule());
5328 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5329 Fn->setDoesNotRecurse();
5330 CodeGenFunction CGF(CGM);
5331 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5332 llvm::Value *Size = nullptr;
5333 // If the size of the reduction item is non-constant, load it from global
5334 // threadprivate variable.
5335 if (RCG.getSizes(N).second) {
5336 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5337 CGF, CGM.getContext().getSizeType(),
5338 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5339 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5340 CGM.getContext().getSizeType(), Loc);
5342 RCG.emitAggregateType(CGF, N, Size);
5343 // Remap lhs and rhs variables to the addresses of the function arguments.
5344 // %lhs = bitcast void* %arg0 to <type>*
5345 // %rhs = bitcast void* %arg1 to <type>*
5346 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5347 PrivateScope.addPrivate(
5348 LHSVD,
5349 // Pull out the pointer to the variable.
5350 CGF.EmitLoadOfPointer(
5351 CGF.GetAddrOfLocalVar(&ParamInOut)
5352 .withElementType(CGF.Builder.getPtrTy(0)),
5353 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5354 PrivateScope.addPrivate(
5355 RHSVD,
5356 // Pull out the pointer to the variable.
5357 CGF.EmitLoadOfPointer(
5358 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5359 CGF.Builder.getPtrTy(0)),
5360 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5361 PrivateScope.Privatize();
5362 // Emit the combiner body:
5363 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5364 // store <type> %2, <type>* %lhs
5365 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5366 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5367 cast<DeclRefExpr>(RHS));
5368 CGF.FinishFunction();
5369 return Fn;
5372 /// Emits reduction finalizer function:
5373 /// \code
5374 /// void @.red_fini(void* %arg) {
5375 /// %0 = bitcast void* %arg to <type>*
5376 /// <destroy>(<type>* %0)
5377 /// ret void
5378 /// }
5379 /// \endcode
5380 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5381 SourceLocation Loc,
5382 ReductionCodeGen &RCG, unsigned N) {
5383 if (!RCG.needCleanups(N))
5384 return nullptr;
5385 ASTContext &C = CGM.getContext();
5386 FunctionArgList Args;
5387 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5388 ImplicitParamKind::Other);
5389 Args.emplace_back(&Param);
5390 const auto &FnInfo =
5391 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5392 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5393 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5394 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5395 Name, &CGM.getModule());
5396 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5397 Fn->setDoesNotRecurse();
5398 CodeGenFunction CGF(CGM);
5399 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5400 Address PrivateAddr = CGF.EmitLoadOfPointer(
5401 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5402 llvm::Value *Size = nullptr;
5403 // If the size of the reduction item is non-constant, load it from global
5404 // threadprivate variable.
5405 if (RCG.getSizes(N).second) {
5406 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5407 CGF, CGM.getContext().getSizeType(),
5408 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5409 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5410 CGM.getContext().getSizeType(), Loc);
5412 RCG.emitAggregateType(CGF, N, Size);
5413 // Emit the finalizer body:
5414 // <destroy>(<type>* %0)
5415 RCG.emitCleanups(CGF, N, PrivateAddr);
5416 CGF.FinishFunction(Loc);
5417 return Fn;
5420 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5421 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5422 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5423 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5424 return nullptr;
5426 // Build typedef struct:
5427 // kmp_taskred_input {
5428 // void *reduce_shar; // shared reduction item
5429 // void *reduce_orig; // original reduction item used for initialization
5430 // size_t reduce_size; // size of data item
5431 // void *reduce_init; // data initialization routine
5432 // void *reduce_fini; // data finalization routine
5433 // void *reduce_comb; // data combiner routine
5434 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5435 // } kmp_taskred_input_t;
5436 ASTContext &C = CGM.getContext();
5437 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5438 RD->startDefinition();
5439 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5440 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5441 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5442 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5443 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5444 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5445 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5446 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5447 RD->completeDefinition();
5448 QualType RDType = C.getRecordType(RD);
5449 unsigned Size = Data.ReductionVars.size();
5450 llvm::APInt ArraySize(/*numBits=*/64, Size);
5451 QualType ArrayRDType =
5452 C.getConstantArrayType(RDType, ArraySize, nullptr,
5453 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5454 // kmp_task_red_input_t .rd_input.[Size];
5455 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5456 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5457 Data.ReductionCopies, Data.ReductionOps);
5458 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5459 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5460 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5461 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5462 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5463 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5464 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5465 ".rd_input.gep.");
5466 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5467 // ElemLVal.reduce_shar = &Shareds[Cnt];
5468 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5469 RCG.emitSharedOrigLValue(CGF, Cnt);
5470 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5471 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5472 // ElemLVal.reduce_orig = &Origs[Cnt];
5473 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5474 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5475 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5476 RCG.emitAggregateType(CGF, Cnt);
5477 llvm::Value *SizeValInChars;
5478 llvm::Value *SizeVal;
5479 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5480 // We use delayed creation/initialization for VLAs and array sections. It is
5481 // required because runtime does not provide the way to pass the sizes of
5482 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5483 // threadprivate global variables are used to store these values and use
5484 // them in the functions.
5485 bool DelayedCreation = !!SizeVal;
5486 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5487 /*isSigned=*/false);
5488 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5489 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5490 // ElemLVal.reduce_init = init;
5491 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5492 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5493 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5494 // ElemLVal.reduce_fini = fini;
5495 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5496 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5497 llvm::Value *FiniAddr =
5498 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5499 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5500 // ElemLVal.reduce_comb = comb;
5501 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5502 llvm::Value *CombAddr = emitReduceCombFunction(
5503 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5504 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5505 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5506 // ElemLVal.flags = 0;
5507 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5508 if (DelayedCreation) {
5509 CGF.EmitStoreOfScalar(
5510 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5511 FlagsLVal);
5512 } else
5513 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5515 if (Data.IsReductionWithTaskMod) {
5516 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5517 // is_ws, int num, void *data);
5518 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5519 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5520 CGM.IntTy, /*isSigned=*/true);
5521 llvm::Value *Args[] = {
5522 IdentTLoc, GTid,
5523 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5524 /*isSigned=*/true),
5525 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5526 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5527 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5528 return CGF.EmitRuntimeCall(
5529 OMPBuilder.getOrCreateRuntimeFunction(
5530 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5531 Args);
5533 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5534 llvm::Value *Args[] = {
5535 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5536 /*isSigned=*/true),
5537 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5538 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5539 CGM.VoidPtrTy)};
5540 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5541 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5542 Args);
5545 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5546 SourceLocation Loc,
5547 bool IsWorksharingReduction) {
5548 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5549 // is_ws, int num, void *data);
5550 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5551 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5552 CGM.IntTy, /*isSigned=*/true);
5553 llvm::Value *Args[] = {IdentTLoc, GTid,
5554 llvm::ConstantInt::get(CGM.IntTy,
5555 IsWorksharingReduction ? 1 : 0,
5556 /*isSigned=*/true)};
5557 (void)CGF.EmitRuntimeCall(
5558 OMPBuilder.getOrCreateRuntimeFunction(
5559 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5560 Args);
5563 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5564 SourceLocation Loc,
5565 ReductionCodeGen &RCG,
5566 unsigned N) {
5567 auto Sizes = RCG.getSizes(N);
5568 // Emit threadprivate global variable if the type is non-constant
5569 // (Sizes.second = nullptr).
5570 if (Sizes.second) {
5571 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5572 /*isSigned=*/false);
5573 Address SizeAddr = getAddrOfArtificialThreadPrivate(
5574 CGF, CGM.getContext().getSizeType(),
5575 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5576 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5580 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5581 SourceLocation Loc,
5582 llvm::Value *ReductionsPtr,
5583 LValue SharedLVal) {
5584 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5585 // *d);
5586 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5587 CGM.IntTy,
5588 /*isSigned=*/true),
5589 ReductionsPtr,
5590 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5591 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5592 return Address(
5593 CGF.EmitRuntimeCall(
5594 OMPBuilder.getOrCreateRuntimeFunction(
5595 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5596 Args),
5597 CGF.Int8Ty, SharedLVal.getAlignment());
5600 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5601 const OMPTaskDataTy &Data) {
5602 if (!CGF.HaveInsertPoint())
5603 return;
5605 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5606 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5607 OMPBuilder.createTaskwait(CGF.Builder);
5608 } else {
5609 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5610 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5611 auto &M = CGM.getModule();
5612 Address DependenciesArray = Address::invalid();
5613 llvm::Value *NumOfElements;
5614 std::tie(NumOfElements, DependenciesArray) =
5615 emitDependClause(CGF, Data.Dependences, Loc);
5616 if (!Data.Dependences.empty()) {
5617 llvm::Value *DepWaitTaskArgs[7];
5618 DepWaitTaskArgs[0] = UpLoc;
5619 DepWaitTaskArgs[1] = ThreadID;
5620 DepWaitTaskArgs[2] = NumOfElements;
5621 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5622 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5623 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5624 DepWaitTaskArgs[6] =
5625 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5627 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5629 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5630 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5631 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5632 // kmp_int32 has_no_wait); if dependence info is specified.
5633 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5634 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5635 DepWaitTaskArgs);
5637 } else {
5639 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5640 // global_tid);
5641 llvm::Value *Args[] = {UpLoc, ThreadID};
5642 // Ignore return result until untied tasks are supported.
5643 CGF.EmitRuntimeCall(
5644 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5645 Args);
5649 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5650 Region->emitUntiedSwitch(CGF);
5653 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5654 OpenMPDirectiveKind InnerKind,
5655 const RegionCodeGenTy &CodeGen,
5656 bool HasCancel) {
5657 if (!CGF.HaveInsertPoint())
5658 return;
5659 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5660 InnerKind != OMPD_critical &&
5661 InnerKind != OMPD_master &&
5662 InnerKind != OMPD_masked);
5663 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5666 namespace {
5667 enum RTCancelKind {
5668 CancelNoreq = 0,
5669 CancelParallel = 1,
5670 CancelLoop = 2,
5671 CancelSections = 3,
5672 CancelTaskgroup = 4
5674 } // anonymous namespace
5676 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5677 RTCancelKind CancelKind = CancelNoreq;
5678 if (CancelRegion == OMPD_parallel)
5679 CancelKind = CancelParallel;
5680 else if (CancelRegion == OMPD_for)
5681 CancelKind = CancelLoop;
5682 else if (CancelRegion == OMPD_sections)
5683 CancelKind = CancelSections;
5684 else {
5685 assert(CancelRegion == OMPD_taskgroup);
5686 CancelKind = CancelTaskgroup;
5688 return CancelKind;
5691 void CGOpenMPRuntime::emitCancellationPointCall(
5692 CodeGenFunction &CGF, SourceLocation Loc,
5693 OpenMPDirectiveKind CancelRegion) {
5694 if (!CGF.HaveInsertPoint())
5695 return;
5696 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5697 // global_tid, kmp_int32 cncl_kind);
5698 if (auto *OMPRegionInfo =
5699 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5700 // For 'cancellation point taskgroup', the task region info may not have a
5701 // cancel. This may instead happen in another adjacent task.
5702 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5703 llvm::Value *Args[] = {
5704 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5705 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5706 // Ignore return result until untied tasks are supported.
5707 llvm::Value *Result = CGF.EmitRuntimeCall(
5708 OMPBuilder.getOrCreateRuntimeFunction(
5709 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5710 Args);
5711 // if (__kmpc_cancellationpoint()) {
5712 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5713 // exit from construct;
5714 // }
5715 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5716 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5717 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5718 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5719 CGF.EmitBlock(ExitBB);
5720 if (CancelRegion == OMPD_parallel)
5721 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5722 // exit from construct;
5723 CodeGenFunction::JumpDest CancelDest =
5724 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5725 CGF.EmitBranchThroughCleanup(CancelDest);
5726 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5731 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5732 const Expr *IfCond,
5733 OpenMPDirectiveKind CancelRegion) {
5734 if (!CGF.HaveInsertPoint())
5735 return;
5736 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5737 // kmp_int32 cncl_kind);
5738 auto &M = CGM.getModule();
5739 if (auto *OMPRegionInfo =
5740 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5741 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5742 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5743 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5744 llvm::Value *Args[] = {
5745 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5746 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5747 // Ignore return result until untied tasks are supported.
5748 llvm::Value *Result = CGF.EmitRuntimeCall(
5749 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5750 // if (__kmpc_cancel()) {
5751 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5752 // exit from construct;
5753 // }
5754 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5755 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5756 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5757 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5758 CGF.EmitBlock(ExitBB);
5759 if (CancelRegion == OMPD_parallel)
5760 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5761 // exit from construct;
5762 CodeGenFunction::JumpDest CancelDest =
5763 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5764 CGF.EmitBranchThroughCleanup(CancelDest);
5765 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5767 if (IfCond) {
5768 emitIfClause(CGF, IfCond, ThenGen,
5769 [](CodeGenFunction &, PrePostActionTy &) {});
5770 } else {
5771 RegionCodeGenTy ThenRCG(ThenGen);
5772 ThenRCG(CGF);
5777 namespace {
5778 /// Cleanup action for uses_allocators support.
5779 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5780 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5782 public:
5783 OMPUsesAllocatorsActionTy(
5784 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5785 : Allocators(Allocators) {}
5786 void Enter(CodeGenFunction &CGF) override {
5787 if (!CGF.HaveInsertPoint())
5788 return;
5789 for (const auto &AllocatorData : Allocators) {
5790 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
5791 CGF, AllocatorData.first, AllocatorData.second);
5794 void Exit(CodeGenFunction &CGF) override {
5795 if (!CGF.HaveInsertPoint())
5796 return;
5797 for (const auto &AllocatorData : Allocators) {
5798 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
5799 AllocatorData.first);
5803 } // namespace
5805 void CGOpenMPRuntime::emitTargetOutlinedFunction(
5806 const OMPExecutableDirective &D, StringRef ParentName,
5807 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5808 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5809 assert(!ParentName.empty() && "Invalid target entry parent name!");
5810 HasEmittedTargetRegion = true;
5811 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
5812 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5813 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5814 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5815 if (!D.AllocatorTraits)
5816 continue;
5817 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5820 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5821 CodeGen.setAction(UsesAllocatorAction);
5822 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5823 IsOffloadEntry, CodeGen);
5826 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
5827 const Expr *Allocator,
5828 const Expr *AllocatorTraits) {
5829 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5830 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5831 // Use default memspace handle.
5832 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5833 llvm::Value *NumTraits = llvm::ConstantInt::get(
5834 CGF.IntTy, cast<ConstantArrayType>(
5835 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5836 ->getSize()
5837 .getLimitedValue());
5838 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5839 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5840 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5841 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5842 AllocatorTraitsLVal.getBaseInfo(),
5843 AllocatorTraitsLVal.getTBAAInfo());
5844 llvm::Value *Traits = Addr.emitRawPointer(CGF);
5846 llvm::Value *AllocatorVal =
5847 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5848 CGM.getModule(), OMPRTL___kmpc_init_allocator),
5849 {ThreadId, MemSpaceHandle, NumTraits, Traits});
5850 // Store to allocator.
5851 CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5852 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5853 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5854 AllocatorVal =
5855 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5856 Allocator->getType(), Allocator->getExprLoc());
5857 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5860 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
5861 const Expr *Allocator) {
5862 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5863 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5864 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5865 llvm::Value *AllocatorVal =
5866 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5867 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5868 CGF.getContext().VoidPtrTy,
5869 Allocator->getExprLoc());
5870 (void)CGF.EmitRuntimeCall(
5871 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5872 OMPRTL___kmpc_destroy_allocator),
5873 {ThreadId, AllocatorVal});
5876 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
5877 const OMPExecutableDirective &D, CodeGenFunction &CGF,
5878 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5879 int32_t &MaxTeamsVal) {
5881 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5882 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5883 /*UpperBoundOnly=*/true);
5885 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5886 for (auto *A : C->getAttrs()) {
5887 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5888 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5889 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5890 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5891 &AttrMinBlocksVal, &AttrMaxBlocksVal);
5892 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5893 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
5894 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5895 &AttrMaxThreadsVal);
5896 else
5897 continue;
5899 MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
5900 if (AttrMaxThreadsVal > 0)
5901 MaxThreadsVal = MaxThreadsVal > 0
5902 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5903 : AttrMaxThreadsVal;
5904 MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
5905 if (AttrMaxBlocksVal > 0)
5906 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5907 : AttrMaxBlocksVal;
5912 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5913 const OMPExecutableDirective &D, StringRef ParentName,
5914 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5915 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5917 llvm::TargetRegionEntryInfo EntryInfo =
5918 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
5920 CodeGenFunction CGF(CGM, true);
5921 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5922 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5923 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5925 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5926 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5927 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
5930 llvm::Error Err = OMPBuilder.emitTargetRegionFunction(
5931 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
5932 OutlinedFnID);
5933 assert(!Err && "unexpected error creating target region");
5935 if (!OutlinedFn)
5936 return;
5938 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5940 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5941 for (auto *A : C->getAttrs()) {
5942 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5943 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5948 /// Checks if the expression is constant or does not have non-trivial function
5949 /// calls.
5950 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5951 // We can skip constant expressions.
5952 // We can skip expressions with trivial calls or simple expressions.
5953 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
5954 !E->hasNonTrivialCall(Ctx)) &&
5955 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5958 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
5959 const Stmt *Body) {
5960 const Stmt *Child = Body->IgnoreContainers();
5961 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5962 Child = nullptr;
5963 for (const Stmt *S : C->body()) {
5964 if (const auto *E = dyn_cast<Expr>(S)) {
5965 if (isTrivial(Ctx, E))
5966 continue;
5968 // Some of the statements can be ignored.
5969 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5970 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5971 continue;
5972 // Analyze declarations.
5973 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5974 if (llvm::all_of(DS->decls(), [](const Decl *D) {
5975 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5976 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5977 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5978 isa<UsingDirectiveDecl>(D) ||
5979 isa<OMPDeclareReductionDecl>(D) ||
5980 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
5981 return true;
5982 const auto *VD = dyn_cast<VarDecl>(D);
5983 if (!VD)
5984 return false;
5985 return VD->hasGlobalStorage() || !VD->isUsed();
5987 continue;
5989 // Found multiple children - cannot get the one child only.
5990 if (Child)
5991 return nullptr;
5992 Child = S;
5994 if (Child)
5995 Child = Child->IgnoreContainers();
5997 return Child;
6000 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6001 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6002 int32_t &MaxTeamsVal) {
6004 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6005 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6006 "Expected target-based executable directive.");
6007 switch (DirectiveKind) {
6008 case OMPD_target: {
6009 const auto *CS = D.getInnermostCapturedStmt();
6010 const auto *Body =
6011 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6012 const Stmt *ChildStmt =
6013 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6014 if (const auto *NestedDir =
6015 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6016 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6017 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6018 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6019 ->getNumTeams()
6020 .front();
6021 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6022 if (auto Constant =
6023 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6024 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6025 return NumTeams;
6027 MinTeamsVal = MaxTeamsVal = 0;
6028 return nullptr;
6030 MinTeamsVal = MaxTeamsVal = 1;
6031 return nullptr;
6033 // A value of -1 is used to check if we need to emit no teams region
6034 MinTeamsVal = MaxTeamsVal = -1;
6035 return nullptr;
6037 case OMPD_target_teams_loop:
6038 case OMPD_target_teams:
6039 case OMPD_target_teams_distribute:
6040 case OMPD_target_teams_distribute_simd:
6041 case OMPD_target_teams_distribute_parallel_for:
6042 case OMPD_target_teams_distribute_parallel_for_simd: {
6043 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6044 const Expr *NumTeams =
6045 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6046 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6047 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6048 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6049 return NumTeams;
6051 MinTeamsVal = MaxTeamsVal = 0;
6052 return nullptr;
6054 case OMPD_target_parallel:
6055 case OMPD_target_parallel_for:
6056 case OMPD_target_parallel_for_simd:
6057 case OMPD_target_parallel_loop:
6058 case OMPD_target_simd:
6059 MinTeamsVal = MaxTeamsVal = 1;
6060 return nullptr;
6061 case OMPD_parallel:
6062 case OMPD_for:
6063 case OMPD_parallel_for:
6064 case OMPD_parallel_loop:
6065 case OMPD_parallel_master:
6066 case OMPD_parallel_sections:
6067 case OMPD_for_simd:
6068 case OMPD_parallel_for_simd:
6069 case OMPD_cancel:
6070 case OMPD_cancellation_point:
6071 case OMPD_ordered:
6072 case OMPD_threadprivate:
6073 case OMPD_allocate:
6074 case OMPD_task:
6075 case OMPD_simd:
6076 case OMPD_tile:
6077 case OMPD_unroll:
6078 case OMPD_sections:
6079 case OMPD_section:
6080 case OMPD_single:
6081 case OMPD_master:
6082 case OMPD_critical:
6083 case OMPD_taskyield:
6084 case OMPD_barrier:
6085 case OMPD_taskwait:
6086 case OMPD_taskgroup:
6087 case OMPD_atomic:
6088 case OMPD_flush:
6089 case OMPD_depobj:
6090 case OMPD_scan:
6091 case OMPD_teams:
6092 case OMPD_target_data:
6093 case OMPD_target_exit_data:
6094 case OMPD_target_enter_data:
6095 case OMPD_distribute:
6096 case OMPD_distribute_simd:
6097 case OMPD_distribute_parallel_for:
6098 case OMPD_distribute_parallel_for_simd:
6099 case OMPD_teams_distribute:
6100 case OMPD_teams_distribute_simd:
6101 case OMPD_teams_distribute_parallel_for:
6102 case OMPD_teams_distribute_parallel_for_simd:
6103 case OMPD_target_update:
6104 case OMPD_declare_simd:
6105 case OMPD_declare_variant:
6106 case OMPD_begin_declare_variant:
6107 case OMPD_end_declare_variant:
6108 case OMPD_declare_target:
6109 case OMPD_end_declare_target:
6110 case OMPD_declare_reduction:
6111 case OMPD_declare_mapper:
6112 case OMPD_taskloop:
6113 case OMPD_taskloop_simd:
6114 case OMPD_master_taskloop:
6115 case OMPD_master_taskloop_simd:
6116 case OMPD_parallel_master_taskloop:
6117 case OMPD_parallel_master_taskloop_simd:
6118 case OMPD_requires:
6119 case OMPD_metadirective:
6120 case OMPD_unknown:
6121 break;
6122 default:
6123 break;
6125 llvm_unreachable("Unexpected directive kind.");
6128 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6129 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6130 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6131 "Clauses associated with the teams directive expected to be emitted "
6132 "only for the host!");
6133 CGBuilderTy &Bld = CGF.Builder;
6134 int32_t MinNT = -1, MaxNT = -1;
6135 const Expr *NumTeams =
6136 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6137 if (NumTeams != nullptr) {
6138 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6140 switch (DirectiveKind) {
6141 case OMPD_target: {
6142 const auto *CS = D.getInnermostCapturedStmt();
6143 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6144 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6145 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6146 /*IgnoreResultAssign*/ true);
6147 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6148 /*isSigned=*/true);
6150 case OMPD_target_teams:
6151 case OMPD_target_teams_distribute:
6152 case OMPD_target_teams_distribute_simd:
6153 case OMPD_target_teams_distribute_parallel_for:
6154 case OMPD_target_teams_distribute_parallel_for_simd: {
6155 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6156 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6157 /*IgnoreResultAssign*/ true);
6158 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6159 /*isSigned=*/true);
6161 default:
6162 break;
6166 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6167 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6170 /// Check for a num threads constant value (stored in \p DefaultVal), or
6171 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6172 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6173 /// nullptr, no expression evaluation is perfomed.
6174 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6175 const Expr **E, int32_t &UpperBound,
6176 bool UpperBoundOnly, llvm::Value **CondVal) {
6177 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6178 CGF.getContext(), CS->getCapturedStmt());
6179 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6180 if (!Dir)
6181 return;
6183 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6184 // Handle if clause. If if clause present, the number of threads is
6185 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6186 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6187 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6188 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6189 const OMPIfClause *IfClause = nullptr;
6190 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6191 if (C->getNameModifier() == OMPD_unknown ||
6192 C->getNameModifier() == OMPD_parallel) {
6193 IfClause = C;
6194 break;
6197 if (IfClause) {
6198 const Expr *CondExpr = IfClause->getCondition();
6199 bool Result;
6200 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6201 if (!Result) {
6202 UpperBound = 1;
6203 return;
6205 } else {
6206 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6207 if (const auto *PreInit =
6208 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6209 for (const auto *I : PreInit->decls()) {
6210 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6211 CGF.EmitVarDecl(cast<VarDecl>(*I));
6212 } else {
6213 CodeGenFunction::AutoVarEmission Emission =
6214 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6215 CGF.EmitAutoVarCleanups(Emission);
6218 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6223 // Check the value of num_threads clause iff if clause was not specified
6224 // or is not evaluated to false.
6225 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6226 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6227 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6228 const auto *NumThreadsClause =
6229 Dir->getSingleClause<OMPNumThreadsClause>();
6230 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6231 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6232 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6233 UpperBound =
6234 UpperBound
6235 ? Constant->getZExtValue()
6236 : std::min(UpperBound,
6237 static_cast<int32_t>(Constant->getZExtValue()));
6238 // If we haven't found a upper bound, remember we saw a thread limiting
6239 // clause.
6240 if (UpperBound == -1)
6241 UpperBound = 0;
6242 if (!E)
6243 return;
6244 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6245 if (const auto *PreInit =
6246 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6247 for (const auto *I : PreInit->decls()) {
6248 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6249 CGF.EmitVarDecl(cast<VarDecl>(*I));
6250 } else {
6251 CodeGenFunction::AutoVarEmission Emission =
6252 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6253 CGF.EmitAutoVarCleanups(Emission);
6257 *E = NTExpr;
6259 return;
6261 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6262 UpperBound = 1;
6265 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6266 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6267 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6268 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6269 "Clauses associated with the teams directive expected to be emitted "
6270 "only for the host!");
6271 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6272 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6273 "Expected target-based executable directive.");
6275 const Expr *NT = nullptr;
6276 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6278 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6279 if (E->isIntegerConstantExpr(CGF.getContext())) {
6280 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6281 UpperBound = UpperBound ? Constant->getZExtValue()
6282 : std::min(UpperBound,
6283 int32_t(Constant->getZExtValue()));
6285 // If we haven't found a upper bound, remember we saw a thread limiting
6286 // clause.
6287 if (UpperBound == -1)
6288 UpperBound = 0;
6289 if (EPtr)
6290 *EPtr = E;
6293 auto ReturnSequential = [&]() {
6294 UpperBound = 1;
6295 return NT;
6298 switch (DirectiveKind) {
6299 case OMPD_target: {
6300 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6301 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6302 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6303 CGF.getContext(), CS->getCapturedStmt());
6304 // TODO: The standard is not clear how to resolve two thread limit clauses,
6305 // let's pick the teams one if it's present, otherwise the target one.
6306 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6307 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6308 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6309 ThreadLimitClause = TLC;
6310 if (ThreadLimitExpr) {
6311 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6312 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6313 CodeGenFunction::LexicalScope Scope(
6314 CGF,
6315 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6316 if (const auto *PreInit =
6317 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6318 for (const auto *I : PreInit->decls()) {
6319 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6320 CGF.EmitVarDecl(cast<VarDecl>(*I));
6321 } else {
6322 CodeGenFunction::AutoVarEmission Emission =
6323 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6324 CGF.EmitAutoVarCleanups(Emission);
6331 if (ThreadLimitClause)
6332 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6333 ThreadLimitExpr);
6334 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6335 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6336 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6337 CS = Dir->getInnermostCapturedStmt();
6338 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6339 CGF.getContext(), CS->getCapturedStmt());
6340 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6342 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6343 CS = Dir->getInnermostCapturedStmt();
6344 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6345 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6346 return ReturnSequential();
6348 return NT;
6350 case OMPD_target_teams: {
6351 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6352 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6353 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6354 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6355 ThreadLimitExpr);
6357 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6358 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6359 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6360 CGF.getContext(), CS->getCapturedStmt());
6361 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6362 if (Dir->getDirectiveKind() == OMPD_distribute) {
6363 CS = Dir->getInnermostCapturedStmt();
6364 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6367 return NT;
6369 case OMPD_target_teams_distribute:
6370 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6371 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6372 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6373 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6374 ThreadLimitExpr);
6376 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6377 UpperBoundOnly, CondVal);
6378 return NT;
6379 case OMPD_target_teams_loop:
6380 case OMPD_target_parallel_loop:
6381 case OMPD_target_parallel:
6382 case OMPD_target_parallel_for:
6383 case OMPD_target_parallel_for_simd:
6384 case OMPD_target_teams_distribute_parallel_for:
6385 case OMPD_target_teams_distribute_parallel_for_simd: {
6386 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6387 const OMPIfClause *IfClause = nullptr;
6388 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6389 if (C->getNameModifier() == OMPD_unknown ||
6390 C->getNameModifier() == OMPD_parallel) {
6391 IfClause = C;
6392 break;
6395 if (IfClause) {
6396 const Expr *Cond = IfClause->getCondition();
6397 bool Result;
6398 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6399 if (!Result)
6400 return ReturnSequential();
6401 } else {
6402 CodeGenFunction::RunCleanupsScope Scope(CGF);
6403 *CondVal = CGF.EvaluateExprAsBool(Cond);
6407 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6408 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6409 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6410 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6411 ThreadLimitExpr);
6413 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6414 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6415 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6416 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6417 return NumThreadsClause->getNumThreads();
6419 return NT;
6421 case OMPD_target_teams_distribute_simd:
6422 case OMPD_target_simd:
6423 return ReturnSequential();
6424 default:
6425 break;
6427 llvm_unreachable("Unsupported directive kind.");
6430 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6431 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6432 llvm::Value *NumThreadsVal = nullptr;
6433 llvm::Value *CondVal = nullptr;
6434 llvm::Value *ThreadLimitVal = nullptr;
6435 const Expr *ThreadLimitExpr = nullptr;
6436 int32_t UpperBound = -1;
6438 const Expr *NT = getNumThreadsExprForTargetDirective(
6439 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6440 &ThreadLimitExpr);
6442 // Thread limit expressions are used below, emit them.
6443 if (ThreadLimitExpr) {
6444 ThreadLimitVal =
6445 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6446 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6447 /*isSigned=*/false);
6450 // Generate the num teams expression.
6451 if (UpperBound == 1) {
6452 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6453 } else if (NT) {
6454 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6455 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6456 /*isSigned=*/false);
6457 } else if (ThreadLimitVal) {
6458 // If we do not have a num threads value but a thread limit, replace the
6459 // former with the latter. We know handled the thread limit expression.
6460 NumThreadsVal = ThreadLimitVal;
6461 ThreadLimitVal = nullptr;
6462 } else {
6463 // Default to "0" which means runtime choice.
6464 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6465 NumThreadsVal = CGF.Builder.getInt32(0);
6468 // Handle if clause. If if clause present, the number of threads is
6469 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6470 if (CondVal) {
6471 CodeGenFunction::RunCleanupsScope Scope(CGF);
6472 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6473 CGF.Builder.getInt32(1));
6476 // If the thread limit and num teams expression were present, take the
6477 // minimum.
6478 if (ThreadLimitVal) {
6479 NumThreadsVal = CGF.Builder.CreateSelect(
6480 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6481 ThreadLimitVal, NumThreadsVal);
6484 return NumThreadsVal;
6487 namespace {
6488 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6490 // Utility to handle information from clauses associated with a given
6491 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6492 // It provides a convenient interface to obtain the information and generate
6493 // code for that information.
6494 class MappableExprsHandler {
6495 public:
6496 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6497 static unsigned getFlagMemberOffset() {
6498 unsigned Offset = 0;
6499 for (uint64_t Remain =
6500 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6501 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6502 !(Remain & 1); Remain = Remain >> 1)
6503 Offset++;
6504 return Offset;
6507 /// Class that holds debugging information for a data mapping to be passed to
6508 /// the runtime library.
6509 class MappingExprInfo {
6510 /// The variable declaration used for the data mapping.
6511 const ValueDecl *MapDecl = nullptr;
6512 /// The original expression used in the map clause, or null if there is
6513 /// none.
6514 const Expr *MapExpr = nullptr;
6516 public:
6517 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6518 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6520 const ValueDecl *getMapDecl() const { return MapDecl; }
6521 const Expr *getMapExpr() const { return MapExpr; }
6524 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6525 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6526 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6527 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6528 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6529 using MapNonContiguousArrayTy =
6530 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6531 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6532 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6534 /// This structure contains combined information generated for mappable
6535 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6536 /// mappers, and non-contiguous information.
6537 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6538 MapExprsArrayTy Exprs;
6539 MapValueDeclsArrayTy Mappers;
6540 MapValueDeclsArrayTy DevicePtrDecls;
6542 /// Append arrays in \a CurInfo.
6543 void append(MapCombinedInfoTy &CurInfo) {
6544 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6545 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6546 CurInfo.DevicePtrDecls.end());
6547 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6548 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6552 /// Map between a struct and the its lowest & highest elements which have been
6553 /// mapped.
6554 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6555 /// HE(FieldIndex, Pointer)}
6556 struct StructRangeInfoTy {
6557 MapCombinedInfoTy PreliminaryMapData;
6558 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6559 0, Address::invalid()};
6560 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6561 0, Address::invalid()};
6562 Address Base = Address::invalid();
6563 Address LB = Address::invalid();
6564 bool IsArraySection = false;
6565 bool HasCompleteRecord = false;
6568 private:
6569 /// Kind that defines how a device pointer has to be returned.
6570 struct MapInfo {
6571 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6572 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6573 ArrayRef<OpenMPMapModifierKind> MapModifiers;
6574 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6575 bool ReturnDevicePointer = false;
6576 bool IsImplicit = false;
6577 const ValueDecl *Mapper = nullptr;
6578 const Expr *VarRef = nullptr;
6579 bool ForDeviceAddr = false;
6581 MapInfo() = default;
6582 MapInfo(
6583 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6584 OpenMPMapClauseKind MapType,
6585 ArrayRef<OpenMPMapModifierKind> MapModifiers,
6586 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6587 bool ReturnDevicePointer, bool IsImplicit,
6588 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6589 bool ForDeviceAddr = false)
6590 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6591 MotionModifiers(MotionModifiers),
6592 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6593 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6596 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6597 /// member and there is no map information about it, then emission of that
6598 /// entry is deferred until the whole struct has been processed.
6599 struct DeferredDevicePtrEntryTy {
6600 const Expr *IE = nullptr;
6601 const ValueDecl *VD = nullptr;
6602 bool ForDeviceAddr = false;
6604 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6605 bool ForDeviceAddr)
6606 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6609 /// The target directive from where the mappable clauses were extracted. It
6610 /// is either a executable directive or a user-defined mapper directive.
6611 llvm::PointerUnion<const OMPExecutableDirective *,
6612 const OMPDeclareMapperDecl *>
6613 CurDir;
6615 /// Function the directive is being generated for.
6616 CodeGenFunction &CGF;
6618 /// Set of all first private variables in the current directive.
6619 /// bool data is set to true if the variable is implicitly marked as
6620 /// firstprivate, false otherwise.
6621 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6623 /// Map between device pointer declarations and their expression components.
6624 /// The key value for declarations in 'this' is null.
6625 llvm::DenseMap<
6626 const ValueDecl *,
6627 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6628 DevPointersMap;
6630 /// Map between device addr declarations and their expression components.
6631 /// The key value for declarations in 'this' is null.
6632 llvm::DenseMap<
6633 const ValueDecl *,
6634 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6635 HasDevAddrsMap;
6637 /// Map between lambda declarations and their map type.
6638 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6640 llvm::Value *getExprTypeSize(const Expr *E) const {
6641 QualType ExprTy = E->getType().getCanonicalType();
6643 // Calculate the size for array shaping expression.
6644 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6645 llvm::Value *Size =
6646 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6647 for (const Expr *SE : OAE->getDimensions()) {
6648 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6649 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6650 CGF.getContext().getSizeType(),
6651 SE->getExprLoc());
6652 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6654 return Size;
6657 // Reference types are ignored for mapping purposes.
6658 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6659 ExprTy = RefTy->getPointeeType().getCanonicalType();
6661 // Given that an array section is considered a built-in type, we need to
6662 // do the calculation based on the length of the section instead of relying
6663 // on CGF.getTypeSize(E->getType()).
6664 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6665 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
6666 OAE->getBase()->IgnoreParenImpCasts())
6667 .getCanonicalType();
6669 // If there is no length associated with the expression and lower bound is
6670 // not specified too, that means we are using the whole length of the
6671 // base.
6672 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6673 !OAE->getLowerBound())
6674 return CGF.getTypeSize(BaseTy);
6676 llvm::Value *ElemSize;
6677 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6678 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6679 } else {
6680 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6681 assert(ATy && "Expecting array type if not a pointer type.");
6682 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6685 // If we don't have a length at this point, that is because we have an
6686 // array section with a single element.
6687 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6688 return ElemSize;
6690 if (const Expr *LenExpr = OAE->getLength()) {
6691 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6692 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6693 CGF.getContext().getSizeType(),
6694 LenExpr->getExprLoc());
6695 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6697 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6698 OAE->getLowerBound() && "expected array_section[lb:].");
6699 // Size = sizetype - lb * elemtype;
6700 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6701 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6702 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6703 CGF.getContext().getSizeType(),
6704 OAE->getLowerBound()->getExprLoc());
6705 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6706 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6707 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6708 LengthVal = CGF.Builder.CreateSelect(
6709 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6710 return LengthVal;
6712 return CGF.getTypeSize(ExprTy);
6715 /// Return the corresponding bits for a given map clause modifier. Add
6716 /// a flag marking the map as a pointer if requested. Add a flag marking the
6717 /// map as the first one of a series of maps that relate to the same map
6718 /// expression.
6719 OpenMPOffloadMappingFlags getMapTypeBits(
6720 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6721 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6722 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6723 OpenMPOffloadMappingFlags Bits =
6724 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6725 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6726 switch (MapType) {
6727 case OMPC_MAP_alloc:
6728 case OMPC_MAP_release:
6729 // alloc and release is the default behavior in the runtime library, i.e.
6730 // if we don't pass any bits alloc/release that is what the runtime is
6731 // going to do. Therefore, we don't need to signal anything for these two
6732 // type modifiers.
6733 break;
6734 case OMPC_MAP_to:
6735 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6736 break;
6737 case OMPC_MAP_from:
6738 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6739 break;
6740 case OMPC_MAP_tofrom:
6741 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6742 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6743 break;
6744 case OMPC_MAP_delete:
6745 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6746 break;
6747 case OMPC_MAP_unknown:
6748 llvm_unreachable("Unexpected map type!");
6750 if (AddPtrFlag)
6751 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6752 if (AddIsTargetParamFlag)
6753 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6754 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6755 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6756 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6757 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6758 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6759 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6760 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6761 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6762 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6763 if (IsNonContiguous)
6764 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6765 return Bits;
6768 /// Return true if the provided expression is a final array section. A
6769 /// final array section, is one whose length can't be proved to be one.
6770 bool isFinalArraySectionExpression(const Expr *E) const {
6771 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
6773 // It is not an array section and therefore not a unity-size one.
6774 if (!OASE)
6775 return false;
6777 // An array section with no colon always refer to a single element.
6778 if (OASE->getColonLocFirst().isInvalid())
6779 return false;
6781 const Expr *Length = OASE->getLength();
6783 // If we don't have a length we have to check if the array has size 1
6784 // for this dimension. Also, we should always expect a length if the
6785 // base type is pointer.
6786 if (!Length) {
6787 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
6788 OASE->getBase()->IgnoreParenImpCasts())
6789 .getCanonicalType();
6790 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6791 return ATy->getSExtSize() != 1;
6792 // If we don't have a constant dimension length, we have to consider
6793 // the current section as having any size, so it is not necessarily
6794 // unitary. If it happen to be unity size, that's user fault.
6795 return true;
6798 // Check if the length evaluates to 1.
6799 Expr::EvalResult Result;
6800 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6801 return true; // Can have more that size 1.
6803 llvm::APSInt ConstLength = Result.Val.getInt();
6804 return ConstLength.getSExtValue() != 1;
6807 /// Generate the base pointers, section pointers, sizes, map type bits, and
6808 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6809 /// map type, map or motion modifiers, and expression components.
6810 /// \a IsFirstComponent should be set to true if the provided set of
6811 /// components is the first associated with a capture.
6812 void generateInfoForComponentList(
6813 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6814 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6815 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6816 MapCombinedInfoTy &CombinedInfo,
6817 MapCombinedInfoTy &StructBaseCombinedInfo,
6818 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6819 bool IsImplicit, bool GenerateAllInfoForClauses,
6820 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6821 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6822 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6823 OverlappedElements = {},
6824 bool AreBothBasePtrAndPteeMapped = false) const {
6825 // The following summarizes what has to be generated for each map and the
6826 // types below. The generated information is expressed in this order:
6827 // base pointer, section pointer, size, flags
6828 // (to add to the ones that come from the map type and modifier).
6830 // double d;
6831 // int i[100];
6832 // float *p;
6833 // int **a = &i;
6835 // struct S1 {
6836 // int i;
6837 // float f[50];
6838 // }
6839 // struct S2 {
6840 // int i;
6841 // float f[50];
6842 // S1 s;
6843 // double *p;
6844 // struct S2 *ps;
6845 // int &ref;
6846 // }
6847 // S2 s;
6848 // S2 *ps;
6850 // map(d)
6851 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6853 // map(i)
6854 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6856 // map(i[1:23])
6857 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6859 // map(p)
6860 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6862 // map(p[1:24])
6863 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6864 // in unified shared memory mode or for local pointers
6865 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6867 // map((*a)[0:3])
6868 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6869 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6871 // map(**a)
6872 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6873 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6875 // map(s)
6876 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6878 // map(s.i)
6879 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6881 // map(s.s.f)
6882 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6884 // map(s.p)
6885 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6887 // map(to: s.p[:22])
6888 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6889 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6890 // &(s.p), &(s.p[0]), 22*sizeof(double),
6891 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6892 // (*) alloc space for struct members, only this is a target parameter
6893 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6894 // optimizes this entry out, same in the examples below)
6895 // (***) map the pointee (map: to)
6897 // map(to: s.ref)
6898 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6899 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6900 // (*) alloc space for struct members, only this is a target parameter
6901 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6902 // optimizes this entry out, same in the examples below)
6903 // (***) map the pointee (map: to)
6905 // map(s.ps)
6906 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6908 // map(from: s.ps->s.i)
6909 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6910 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6911 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6913 // map(to: s.ps->ps)
6914 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6915 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6916 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6918 // map(s.ps->ps->ps)
6919 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6920 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6921 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6922 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6924 // map(to: s.ps->ps->s.f[:22])
6925 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6926 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6927 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6928 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6930 // map(ps)
6931 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6933 // map(ps->i)
6934 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6936 // map(ps->s.f)
6937 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6939 // map(from: ps->p)
6940 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6942 // map(to: ps->p[:22])
6943 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6944 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6945 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6947 // map(ps->ps)
6948 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6950 // map(from: ps->ps->s.i)
6951 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6952 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6953 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6955 // map(from: ps->ps->ps)
6956 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6957 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6958 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6960 // map(ps->ps->ps->ps)
6961 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6962 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6963 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6964 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6966 // map(to: ps->ps->ps->s.f[:22])
6967 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6968 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6969 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6970 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6972 // map(to: s.f[:22]) map(from: s.p[:33])
6973 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6974 // sizeof(double*) (**), TARGET_PARAM
6975 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6976 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6977 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6978 // (*) allocate contiguous space needed to fit all mapped members even if
6979 // we allocate space for members not mapped (in this example,
6980 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
6981 // them as well because they fall between &s.f[0] and &s.p)
6983 // map(from: s.f[:22]) map(to: ps->p[:33])
6984 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6985 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6986 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6987 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6988 // (*) the struct this entry pertains to is the 2nd element in the list of
6989 // arguments, hence MEMBER_OF(2)
6991 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6992 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
6993 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
6994 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
6995 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6996 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
6997 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
6998 // (*) the struct this entry pertains to is the 4th element in the list
6999 // of arguments, hence MEMBER_OF(4)
7001 // map(p, p[:100])
7002 // ===> map(p[:100])
7003 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7005 // Track if the map information being generated is the first for a capture.
7006 bool IsCaptureFirstInfo = IsFirstComponentList;
7007 // When the variable is on a declare target link or in a to clause with
7008 // unified memory, a reference is needed to hold the host/device address
7009 // of the variable.
7010 bool RequiresReference = false;
7012 // Scan the components from the base to the complete expression.
7013 auto CI = Components.rbegin();
7014 auto CE = Components.rend();
7015 auto I = CI;
7017 // Track if the map information being generated is the first for a list of
7018 // components.
7019 bool IsExpressionFirstInfo = true;
7020 bool FirstPointerInComplexData = false;
7021 Address BP = Address::invalid();
7022 const Expr *AssocExpr = I->getAssociatedExpression();
7023 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7024 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7025 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7027 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7028 return;
7029 if (isa<MemberExpr>(AssocExpr)) {
7030 // The base is the 'this' pointer. The content of the pointer is going
7031 // to be the base of the field being mapped.
7032 BP = CGF.LoadCXXThisAddress();
7033 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7034 (OASE &&
7035 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7036 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7037 } else if (OAShE &&
7038 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7039 BP = Address(
7040 CGF.EmitScalarExpr(OAShE->getBase()),
7041 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7042 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7043 } else {
7044 // The base is the reference to the variable.
7045 // BP = &Var.
7046 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7047 if (const auto *VD =
7048 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7049 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7050 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7051 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7052 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7053 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7054 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7055 RequiresReference = true;
7056 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7061 // If the variable is a pointer and is being dereferenced (i.e. is not
7062 // the last component), the base has to be the pointer itself, not its
7063 // reference. References are ignored for mapping purposes.
7064 QualType Ty =
7065 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7066 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7067 // No need to generate individual map information for the pointer, it
7068 // can be associated with the combined storage if shared memory mode is
7069 // active or the base declaration is not global variable.
7070 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7071 if (!AreBothBasePtrAndPteeMapped &&
7072 (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7073 !VD || VD->hasLocalStorage()))
7074 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7075 else
7076 FirstPointerInComplexData = true;
7077 ++I;
7081 // Track whether a component of the list should be marked as MEMBER_OF some
7082 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7083 // in a component list should be marked as MEMBER_OF, all subsequent entries
7084 // do not belong to the base struct. E.g.
7085 // struct S2 s;
7086 // s.ps->ps->ps->f[:]
7087 // (1) (2) (3) (4)
7088 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7089 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7090 // is the pointee of ps(2) which is not member of struct s, so it should not
7091 // be marked as such (it is still PTR_AND_OBJ).
7092 // The variable is initialized to false so that PTR_AND_OBJ entries which
7093 // are not struct members are not considered (e.g. array of pointers to
7094 // data).
7095 bool ShouldBeMemberOf = false;
7097 // Variable keeping track of whether or not we have encountered a component
7098 // in the component list which is a member expression. Useful when we have a
7099 // pointer or a final array section, in which case it is the previous
7100 // component in the list which tells us whether we have a member expression.
7101 // E.g. X.f[:]
7102 // While processing the final array section "[:]" it is "f" which tells us
7103 // whether we are dealing with a member of a declared struct.
7104 const MemberExpr *EncounteredME = nullptr;
7106 // Track for the total number of dimension. Start from one for the dummy
7107 // dimension.
7108 uint64_t DimSize = 1;
7110 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7111 bool IsPrevMemberReference = false;
7113 bool IsPartialMapped =
7114 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7116 // We need to check if we will be encountering any MEs. If we do not
7117 // encounter any ME expression it means we will be mapping the whole struct.
7118 // In that case we need to skip adding an entry for the struct to the
7119 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7120 // list only when generating all info for clauses.
7121 bool IsMappingWholeStruct = true;
7122 if (!GenerateAllInfoForClauses) {
7123 IsMappingWholeStruct = false;
7124 } else {
7125 for (auto TempI = I; TempI != CE; ++TempI) {
7126 const MemberExpr *PossibleME =
7127 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7128 if (PossibleME) {
7129 IsMappingWholeStruct = false;
7130 break;
7135 for (; I != CE; ++I) {
7136 // If the current component is member of a struct (parent struct) mark it.
7137 if (!EncounteredME) {
7138 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7139 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7140 // as MEMBER_OF the parent struct.
7141 if (EncounteredME) {
7142 ShouldBeMemberOf = true;
7143 // Do not emit as complex pointer if this is actually not array-like
7144 // expression.
7145 if (FirstPointerInComplexData) {
7146 QualType Ty = std::prev(I)
7147 ->getAssociatedDeclaration()
7148 ->getType()
7149 .getNonReferenceType();
7150 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7151 FirstPointerInComplexData = false;
7156 auto Next = std::next(I);
7158 // We need to generate the addresses and sizes if this is the last
7159 // component, if the component is a pointer or if it is an array section
7160 // whose length can't be proved to be one. If this is a pointer, it
7161 // becomes the base address for the following components.
7163 // A final array section, is one whose length can't be proved to be one.
7164 // If the map item is non-contiguous then we don't treat any array section
7165 // as final array section.
7166 bool IsFinalArraySection =
7167 !IsNonContiguous &&
7168 isFinalArraySectionExpression(I->getAssociatedExpression());
7170 // If we have a declaration for the mapping use that, otherwise use
7171 // the base declaration of the map clause.
7172 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7173 ? I->getAssociatedDeclaration()
7174 : BaseDecl;
7175 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7176 : MapExpr;
7178 // Get information on whether the element is a pointer. Have to do a
7179 // special treatment for array sections given that they are built-in
7180 // types.
7181 const auto *OASE =
7182 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7183 const auto *OAShE =
7184 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7185 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7186 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7187 bool IsPointer =
7188 OAShE ||
7189 (OASE && ArraySectionExpr::getBaseOriginalType(OASE)
7190 .getCanonicalType()
7191 ->isAnyPointerType()) ||
7192 I->getAssociatedExpression()->getType()->isAnyPointerType();
7193 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7194 MapDecl &&
7195 MapDecl->getType()->isLValueReferenceType();
7196 bool IsNonDerefPointer = IsPointer &&
7197 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7198 !IsNonContiguous;
7200 if (OASE)
7201 ++DimSize;
7203 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7204 IsFinalArraySection) {
7205 // If this is not the last component, we expect the pointer to be
7206 // associated with an array expression or member expression.
7207 assert((Next == CE ||
7208 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7209 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7210 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7211 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7212 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7213 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7214 "Unexpected expression");
7216 Address LB = Address::invalid();
7217 Address LowestElem = Address::invalid();
7218 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7219 const MemberExpr *E) {
7220 const Expr *BaseExpr = E->getBase();
7221 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7222 // scalar.
7223 LValue BaseLV;
7224 if (E->isArrow()) {
7225 LValueBaseInfo BaseInfo;
7226 TBAAAccessInfo TBAAInfo;
7227 Address Addr =
7228 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7229 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7230 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7231 } else {
7232 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7234 return BaseLV;
7236 if (OAShE) {
7237 LowestElem = LB =
7238 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7239 CGF.ConvertTypeForMem(
7240 OAShE->getBase()->getType()->getPointeeType()),
7241 CGF.getContext().getTypeAlignInChars(
7242 OAShE->getBase()->getType()));
7243 } else if (IsMemberReference) {
7244 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7245 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7246 LowestElem = CGF.EmitLValueForFieldInitialization(
7247 BaseLVal, cast<FieldDecl>(MapDecl))
7248 .getAddress();
7249 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7250 .getAddress();
7251 } else {
7252 LowestElem = LB =
7253 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7254 .getAddress();
7257 // If this component is a pointer inside the base struct then we don't
7258 // need to create any entry for it - it will be combined with the object
7259 // it is pointing to into a single PTR_AND_OBJ entry.
7260 bool IsMemberPointerOrAddr =
7261 EncounteredME &&
7262 (((IsPointer || ForDeviceAddr) &&
7263 I->getAssociatedExpression() == EncounteredME) ||
7264 (IsPrevMemberReference && !IsPointer) ||
7265 (IsMemberReference && Next != CE &&
7266 !Next->getAssociatedExpression()->getType()->isPointerType()));
7267 if (!OverlappedElements.empty() && Next == CE) {
7268 // Handle base element with the info for overlapped elements.
7269 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7270 assert(!IsPointer &&
7271 "Unexpected base element with the pointer type.");
7272 // Mark the whole struct as the struct that requires allocation on the
7273 // device.
7274 PartialStruct.LowestElem = {0, LowestElem};
7275 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7276 I->getAssociatedExpression()->getType());
7277 Address HB = CGF.Builder.CreateConstGEP(
7278 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7279 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7280 TypeSize.getQuantity() - 1);
7281 PartialStruct.HighestElem = {
7282 std::numeric_limits<decltype(
7283 PartialStruct.HighestElem.first)>::max(),
7284 HB};
7285 PartialStruct.Base = BP;
7286 PartialStruct.LB = LB;
7287 assert(
7288 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7289 "Overlapped elements must be used only once for the variable.");
7290 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7291 // Emit data for non-overlapped data.
7292 OpenMPOffloadMappingFlags Flags =
7293 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7294 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7295 /*AddPtrFlag=*/false,
7296 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7297 llvm::Value *Size = nullptr;
7298 // Do bitcopy of all non-overlapped structure elements.
7299 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7300 Component : OverlappedElements) {
7301 Address ComponentLB = Address::invalid();
7302 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7303 Component) {
7304 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7305 const auto *FD = dyn_cast<FieldDecl>(VD);
7306 if (FD && FD->getType()->isLValueReferenceType()) {
7307 const auto *ME =
7308 cast<MemberExpr>(MC.getAssociatedExpression());
7309 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7310 ComponentLB =
7311 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7312 .getAddress();
7313 } else {
7314 ComponentLB =
7315 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7316 .getAddress();
7318 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7319 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7320 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7321 LBPtr);
7322 break;
7325 assert(Size && "Failed to determine structure size");
7326 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7327 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7328 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7329 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7330 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7331 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7332 Size, CGF.Int64Ty, /*isSigned=*/true));
7333 CombinedInfo.Types.push_back(Flags);
7334 CombinedInfo.Mappers.push_back(nullptr);
7335 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7336 : 1);
7337 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7339 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7340 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7341 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7342 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7343 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7344 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7345 Size = CGF.Builder.CreatePtrDiff(
7346 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7347 LBPtr);
7348 CombinedInfo.Sizes.push_back(
7349 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7350 CombinedInfo.Types.push_back(Flags);
7351 CombinedInfo.Mappers.push_back(nullptr);
7352 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7353 : 1);
7354 break;
7356 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7357 // Skip adding an entry in the CurInfo of this combined entry if the
7358 // whole struct is currently being mapped. The struct needs to be added
7359 // in the first position before any data internal to the struct is being
7360 // mapped.
7361 // Skip adding an entry in the CurInfo of this combined entry if the
7362 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
7363 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
7364 (Next == CE && MapType != OMPC_MAP_unknown)) {
7365 if (!IsMappingWholeStruct) {
7366 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7367 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7368 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7369 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7370 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7371 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7372 Size, CGF.Int64Ty, /*isSigned=*/true));
7373 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7374 : 1);
7375 } else {
7376 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7377 StructBaseCombinedInfo.BasePointers.push_back(
7378 BP.emitRawPointer(CGF));
7379 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7380 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7381 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7382 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7383 Size, CGF.Int64Ty, /*isSigned=*/true));
7384 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7385 IsNonContiguous ? DimSize : 1);
7388 // If Mapper is valid, the last component inherits the mapper.
7389 bool HasMapper = Mapper && Next == CE;
7390 if (!IsMappingWholeStruct)
7391 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7392 else
7393 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7394 : nullptr);
7396 // We need to add a pointer flag for each map that comes from the
7397 // same expression except for the first one. We also need to signal
7398 // this map is the first one that relates with the current capture
7399 // (there is a set of entries for each capture).
7400 OpenMPOffloadMappingFlags Flags =
7401 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7402 !IsExpressionFirstInfo || RequiresReference ||
7403 FirstPointerInComplexData || IsMemberReference,
7404 AreBothBasePtrAndPteeMapped ||
7405 (IsCaptureFirstInfo && !RequiresReference),
7406 IsNonContiguous);
7408 if (!IsExpressionFirstInfo || IsMemberReference) {
7409 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7410 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7411 if (IsPointer || (IsMemberReference && Next != CE))
7412 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7413 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7414 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7415 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7416 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7418 if (ShouldBeMemberOf) {
7419 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7420 // should be later updated with the correct value of MEMBER_OF.
7421 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7422 // From now on, all subsequent PTR_AND_OBJ entries should not be
7423 // marked as MEMBER_OF.
7424 ShouldBeMemberOf = false;
7428 if (!IsMappingWholeStruct)
7429 CombinedInfo.Types.push_back(Flags);
7430 else
7431 StructBaseCombinedInfo.Types.push_back(Flags);
7434 // If we have encountered a member expression so far, keep track of the
7435 // mapped member. If the parent is "*this", then the value declaration
7436 // is nullptr.
7437 if (EncounteredME) {
7438 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7439 unsigned FieldIndex = FD->getFieldIndex();
7441 // Update info about the lowest and highest elements for this struct
7442 if (!PartialStruct.Base.isValid()) {
7443 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7444 if (IsFinalArraySection) {
7445 Address HB =
7446 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7447 .getAddress();
7448 PartialStruct.HighestElem = {FieldIndex, HB};
7449 } else {
7450 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7452 PartialStruct.Base = BP;
7453 PartialStruct.LB = BP;
7454 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7455 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7456 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7457 if (IsFinalArraySection) {
7458 Address HB =
7459 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7460 .getAddress();
7461 PartialStruct.HighestElem = {FieldIndex, HB};
7462 } else {
7463 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7468 // Need to emit combined struct for array sections.
7469 if (IsFinalArraySection || IsNonContiguous)
7470 PartialStruct.IsArraySection = true;
7472 // If we have a final array section, we are done with this expression.
7473 if (IsFinalArraySection)
7474 break;
7476 // The pointer becomes the base for the next element.
7477 if (Next != CE)
7478 BP = IsMemberReference ? LowestElem : LB;
7479 if (!IsPartialMapped)
7480 IsExpressionFirstInfo = false;
7481 IsCaptureFirstInfo = false;
7482 FirstPointerInComplexData = false;
7483 IsPrevMemberReference = IsMemberReference;
7484 } else if (FirstPointerInComplexData) {
7485 QualType Ty = Components.rbegin()
7486 ->getAssociatedDeclaration()
7487 ->getType()
7488 .getNonReferenceType();
7489 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7490 FirstPointerInComplexData = false;
7493 // If ran into the whole component - allocate the space for the whole
7494 // record.
7495 if (!EncounteredME)
7496 PartialStruct.HasCompleteRecord = true;
7498 if (!IsNonContiguous)
7499 return;
7501 const ASTContext &Context = CGF.getContext();
7503 // For supporting stride in array section, we need to initialize the first
7504 // dimension size as 1, first offset as 0, and first count as 1
7505 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7506 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7507 MapValuesArrayTy CurStrides;
7508 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7509 uint64_t ElementTypeSize;
7511 // Collect Size information for each dimension and get the element size as
7512 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7513 // should be [10, 10] and the first stride is 4 btyes.
7514 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7515 Components) {
7516 const Expr *AssocExpr = Component.getAssociatedExpression();
7517 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7519 if (!OASE)
7520 continue;
7522 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7523 auto *CAT = Context.getAsConstantArrayType(Ty);
7524 auto *VAT = Context.getAsVariableArrayType(Ty);
7526 // We need all the dimension size except for the last dimension.
7527 assert((VAT || CAT || &Component == &*Components.begin()) &&
7528 "Should be either ConstantArray or VariableArray if not the "
7529 "first Component");
7531 // Get element size if CurStrides is empty.
7532 if (CurStrides.empty()) {
7533 const Type *ElementType = nullptr;
7534 if (CAT)
7535 ElementType = CAT->getElementType().getTypePtr();
7536 else if (VAT)
7537 ElementType = VAT->getElementType().getTypePtr();
7538 else
7539 assert(&Component == &*Components.begin() &&
7540 "Only expect pointer (non CAT or VAT) when this is the "
7541 "first Component");
7542 // If ElementType is null, then it means the base is a pointer
7543 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7544 // for next iteration.
7545 if (ElementType) {
7546 // For the case that having pointer as base, we need to remove one
7547 // level of indirection.
7548 if (&Component != &*Components.begin())
7549 ElementType = ElementType->getPointeeOrArrayElementType();
7550 ElementTypeSize =
7551 Context.getTypeSizeInChars(ElementType).getQuantity();
7552 CurStrides.push_back(
7553 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7556 // Get dimension value except for the last dimension since we don't need
7557 // it.
7558 if (DimSizes.size() < Components.size() - 1) {
7559 if (CAT)
7560 DimSizes.push_back(
7561 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7562 else if (VAT)
7563 DimSizes.push_back(CGF.Builder.CreateIntCast(
7564 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7565 /*IsSigned=*/false));
7569 // Skip the dummy dimension since we have already have its information.
7570 auto *DI = DimSizes.begin() + 1;
7571 // Product of dimension.
7572 llvm::Value *DimProd =
7573 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7575 // Collect info for non-contiguous. Notice that offset, count, and stride
7576 // are only meaningful for array-section, so we insert a null for anything
7577 // other than array-section.
7578 // Also, the size of offset, count, and stride are not the same as
7579 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7580 // count, and stride are the same as the number of non-contiguous
7581 // declaration in target update to/from clause.
7582 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7583 Components) {
7584 const Expr *AssocExpr = Component.getAssociatedExpression();
7586 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7587 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7588 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7589 /*isSigned=*/false);
7590 CurOffsets.push_back(Offset);
7591 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7592 CurStrides.push_back(CurStrides.back());
7593 continue;
7596 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7598 if (!OASE)
7599 continue;
7601 // Offset
7602 const Expr *OffsetExpr = OASE->getLowerBound();
7603 llvm::Value *Offset = nullptr;
7604 if (!OffsetExpr) {
7605 // If offset is absent, then we just set it to zero.
7606 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7607 } else {
7608 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7609 CGF.Int64Ty,
7610 /*isSigned=*/false);
7612 CurOffsets.push_back(Offset);
7614 // Count
7615 const Expr *CountExpr = OASE->getLength();
7616 llvm::Value *Count = nullptr;
7617 if (!CountExpr) {
7618 // In Clang, once a high dimension is an array section, we construct all
7619 // the lower dimension as array section, however, for case like
7620 // arr[0:2][2], Clang construct the inner dimension as an array section
7621 // but it actually is not in an array section form according to spec.
7622 if (!OASE->getColonLocFirst().isValid() &&
7623 !OASE->getColonLocSecond().isValid()) {
7624 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7625 } else {
7626 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7627 // When the length is absent it defaults to ⌈(size −
7628 // lower-bound)/stride⌉, where size is the size of the array
7629 // dimension.
7630 const Expr *StrideExpr = OASE->getStride();
7631 llvm::Value *Stride =
7632 StrideExpr
7633 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7634 CGF.Int64Ty, /*isSigned=*/false)
7635 : nullptr;
7636 if (Stride)
7637 Count = CGF.Builder.CreateUDiv(
7638 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7639 else
7640 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7642 } else {
7643 Count = CGF.EmitScalarExpr(CountExpr);
7645 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7646 CurCounts.push_back(Count);
7648 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7649 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7650 // Offset Count Stride
7651 // D0 0 1 4 (int) <- dummy dimension
7652 // D1 0 2 8 (2 * (1) * 4)
7653 // D2 1 2 20 (1 * (1 * 5) * 4)
7654 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7655 const Expr *StrideExpr = OASE->getStride();
7656 llvm::Value *Stride =
7657 StrideExpr
7658 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7659 CGF.Int64Ty, /*isSigned=*/false)
7660 : nullptr;
7661 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7662 if (Stride)
7663 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7664 else
7665 CurStrides.push_back(DimProd);
7666 if (DI != DimSizes.end())
7667 ++DI;
7670 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7671 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7672 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7675 /// Return the adjusted map modifiers if the declaration a capture refers to
7676 /// appears in a first-private clause. This is expected to be used only with
7677 /// directives that start with 'target'.
7678 OpenMPOffloadMappingFlags
7679 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7680 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7682 // A first private variable captured by reference will use only the
7683 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7684 // declaration is known as first-private in this handler.
7685 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7686 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7687 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7688 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7689 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7690 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7692 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7693 if (I != LambdasMap.end())
7694 // for map(to: lambda): using user specified map type.
7695 return getMapTypeBits(
7696 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7697 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
7698 /*AddPtrFlag=*/false,
7699 /*AddIsTargetParamFlag=*/false,
7700 /*isNonContiguous=*/false);
7701 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7702 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7705 void getPlainLayout(const CXXRecordDecl *RD,
7706 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7707 bool AsBase) const {
7708 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7710 llvm::StructType *St =
7711 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7713 unsigned NumElements = St->getNumElements();
7714 llvm::SmallVector<
7715 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7716 RecordLayout(NumElements);
7718 // Fill bases.
7719 for (const auto &I : RD->bases()) {
7720 if (I.isVirtual())
7721 continue;
7723 QualType BaseTy = I.getType();
7724 const auto *Base = BaseTy->getAsCXXRecordDecl();
7725 // Ignore empty bases.
7726 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
7727 CGF.getContext()
7728 .getASTRecordLayout(Base)
7729 .getNonVirtualSize()
7730 .isZero())
7731 continue;
7733 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7734 RecordLayout[FieldIndex] = Base;
7736 // Fill in virtual bases.
7737 for (const auto &I : RD->vbases()) {
7738 QualType BaseTy = I.getType();
7739 // Ignore empty bases.
7740 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
7741 continue;
7743 const auto *Base = BaseTy->getAsCXXRecordDecl();
7744 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7745 if (RecordLayout[FieldIndex])
7746 continue;
7747 RecordLayout[FieldIndex] = Base;
7749 // Fill in all the fields.
7750 assert(!RD->isUnion() && "Unexpected union.");
7751 for (const auto *Field : RD->fields()) {
7752 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7753 // will fill in later.)
7754 if (!Field->isBitField() &&
7755 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
7756 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7757 RecordLayout[FieldIndex] = Field;
7760 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7761 &Data : RecordLayout) {
7762 if (Data.isNull())
7763 continue;
7764 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7765 getPlainLayout(Base, Layout, /*AsBase=*/true);
7766 else
7767 Layout.push_back(Data.get<const FieldDecl *>());
7771 /// Generate all the base pointers, section pointers, sizes, map types, and
7772 /// mappers for the extracted mappable expressions (all included in \a
7773 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7774 /// pair of the relevant declaration and index where it occurs is appended to
7775 /// the device pointers info array.
7776 void generateAllInfoForClauses(
7777 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7778 llvm::OpenMPIRBuilder &OMPBuilder,
7779 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7780 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7781 // We have to process the component lists that relate with the same
7782 // declaration in a single chunk so that we can generate the map flags
7783 // correctly. Therefore, we organize all lists in a map.
7784 enum MapKind { Present, Allocs, Other, Total };
7785 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7786 SmallVector<SmallVector<MapInfo, 8>, 4>>
7787 Info;
7789 // Helper function to fill the information map for the different supported
7790 // clauses.
7791 auto &&InfoGen =
7792 [&Info, &SkipVarSet](
7793 const ValueDecl *D, MapKind Kind,
7794 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7795 OpenMPMapClauseKind MapType,
7796 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7797 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7798 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7799 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7800 if (SkipVarSet.contains(D))
7801 return;
7802 auto It = Info.try_emplace(D, Total).first;
7803 It->second[Kind].emplace_back(
7804 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7805 IsImplicit, Mapper, VarRef, ForDeviceAddr);
7808 for (const auto *Cl : Clauses) {
7809 const auto *C = dyn_cast<OMPMapClause>(Cl);
7810 if (!C)
7811 continue;
7812 MapKind Kind = Other;
7813 if (llvm::is_contained(C->getMapTypeModifiers(),
7814 OMPC_MAP_MODIFIER_present))
7815 Kind = Present;
7816 else if (C->getMapType() == OMPC_MAP_alloc)
7817 Kind = Allocs;
7818 const auto *EI = C->getVarRefs().begin();
7819 for (const auto L : C->component_lists()) {
7820 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7821 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7822 C->getMapTypeModifiers(), {},
7823 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7825 ++EI;
7828 for (const auto *Cl : Clauses) {
7829 const auto *C = dyn_cast<OMPToClause>(Cl);
7830 if (!C)
7831 continue;
7832 MapKind Kind = Other;
7833 if (llvm::is_contained(C->getMotionModifiers(),
7834 OMPC_MOTION_MODIFIER_present))
7835 Kind = Present;
7836 const auto *EI = C->getVarRefs().begin();
7837 for (const auto L : C->component_lists()) {
7838 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
7839 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7840 C->isImplicit(), std::get<2>(L), *EI);
7841 ++EI;
7844 for (const auto *Cl : Clauses) {
7845 const auto *C = dyn_cast<OMPFromClause>(Cl);
7846 if (!C)
7847 continue;
7848 MapKind Kind = Other;
7849 if (llvm::is_contained(C->getMotionModifiers(),
7850 OMPC_MOTION_MODIFIER_present))
7851 Kind = Present;
7852 const auto *EI = C->getVarRefs().begin();
7853 for (const auto L : C->component_lists()) {
7854 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
7855 C->getMotionModifiers(),
7856 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7857 *EI);
7858 ++EI;
7862 // Look at the use_device_ptr and use_device_addr clauses information and
7863 // mark the existing map entries as such. If there is no map information for
7864 // an entry in the use_device_ptr and use_device_addr list, we create one
7865 // with map type 'alloc' and zero size section. It is the user fault if that
7866 // was not mapped before. If there is no map information and the pointer is
7867 // a struct member, then we defer the emission of that entry until the whole
7868 // struct has been processed.
7869 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7870 SmallVector<DeferredDevicePtrEntryTy, 4>>
7871 DeferredInfo;
7872 MapCombinedInfoTy UseDeviceDataCombinedInfo;
7874 auto &&UseDeviceDataCombinedInfoGen =
7875 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7876 CodeGenFunction &CGF, bool IsDevAddr) {
7877 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7878 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7879 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7880 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7881 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7882 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7883 UseDeviceDataCombinedInfo.Sizes.push_back(
7884 llvm::Constant::getNullValue(CGF.Int64Ty));
7885 UseDeviceDataCombinedInfo.Types.push_back(
7886 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7887 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7890 auto &&MapInfoGen =
7891 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7892 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7893 OMPClauseMappableExprCommon::MappableExprComponentListRef
7894 Components,
7895 bool IsImplicit, bool IsDevAddr) {
7896 // We didn't find any match in our map information - generate a zero
7897 // size array section - if the pointer is a struct member we defer
7898 // this action until the whole struct has been processed.
7899 if (isa<MemberExpr>(IE)) {
7900 // Insert the pointer into Info to be processed by
7901 // generateInfoForComponentList. Because it is a member pointer
7902 // without a pointee, no entry will be generated for it, therefore
7903 // we need to generate one after the whole struct has been
7904 // processed. Nonetheless, generateInfoForComponentList must be
7905 // called to take the pointer into account for the calculation of
7906 // the range of the partial struct.
7907 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
7908 /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
7909 IsDevAddr);
7910 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7911 } else {
7912 llvm::Value *Ptr;
7913 if (IsDevAddr) {
7914 if (IE->isGLValue())
7915 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7916 else
7917 Ptr = CGF.EmitScalarExpr(IE);
7918 } else {
7919 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7921 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7925 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7926 const Expr *IE, bool IsDevAddr) -> bool {
7927 // We potentially have map information for this declaration already.
7928 // Look for the first set of components that refer to it. If found,
7929 // return true.
7930 // If the first component is a member expression, we have to look into
7931 // 'this', which maps to null in the map of map information. Otherwise
7932 // look directly for the information.
7933 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7934 if (It != Info.end()) {
7935 bool Found = false;
7936 for (auto &Data : It->second) {
7937 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7938 return MI.Components.back().getAssociatedDeclaration() == VD;
7940 // If we found a map entry, signal that the pointer has to be
7941 // returned and move on to the next declaration. Exclude cases where
7942 // the base pointer is mapped as array subscript, array section or
7943 // array shaping. The base address is passed as a pointer to base in
7944 // this case and cannot be used as a base for use_device_ptr list
7945 // item.
7946 if (CI != Data.end()) {
7947 if (IsDevAddr) {
7948 CI->ForDeviceAddr = IsDevAddr;
7949 CI->ReturnDevicePointer = true;
7950 Found = true;
7951 break;
7952 } else {
7953 auto PrevCI = std::next(CI->Components.rbegin());
7954 const auto *VarD = dyn_cast<VarDecl>(VD);
7955 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7956 isa<MemberExpr>(IE) ||
7957 !VD->getType().getNonReferenceType()->isPointerType() ||
7958 PrevCI == CI->Components.rend() ||
7959 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7960 VarD->hasLocalStorage()) {
7961 CI->ForDeviceAddr = IsDevAddr;
7962 CI->ReturnDevicePointer = true;
7963 Found = true;
7964 break;
7969 return Found;
7971 return false;
7974 // Look at the use_device_ptr clause information and mark the existing map
7975 // entries as such. If there is no map information for an entry in the
7976 // use_device_ptr list, we create one with map type 'alloc' and zero size
7977 // section. It is the user fault if that was not mapped before. If there is
7978 // no map information and the pointer is a struct member, then we defer the
7979 // emission of that entry until the whole struct has been processed.
7980 for (const auto *Cl : Clauses) {
7981 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
7982 if (!C)
7983 continue;
7984 for (const auto L : C->component_lists()) {
7985 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
7986 std::get<1>(L);
7987 assert(!Components.empty() &&
7988 "Not expecting empty list of components!");
7989 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
7990 VD = cast<ValueDecl>(VD->getCanonicalDecl());
7991 const Expr *IE = Components.back().getAssociatedExpression();
7992 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
7993 continue;
7994 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
7995 /*IsDevAddr=*/false);
7999 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8000 for (const auto *Cl : Clauses) {
8001 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8002 if (!C)
8003 continue;
8004 for (const auto L : C->component_lists()) {
8005 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8006 std::get<1>(L);
8007 assert(!std::get<1>(L).empty() &&
8008 "Not expecting empty list of components!");
8009 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8010 if (!Processed.insert(VD).second)
8011 continue;
8012 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8013 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8014 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8015 continue;
8016 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8017 /*IsDevAddr=*/true);
8021 for (const auto &Data : Info) {
8022 StructRangeInfoTy PartialStruct;
8023 // Current struct information:
8024 MapCombinedInfoTy CurInfo;
8025 // Current struct base information:
8026 MapCombinedInfoTy StructBaseCurInfo;
8027 const Decl *D = Data.first;
8028 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8029 bool HasMapBasePtr = false;
8030 bool HasMapArraySec = false;
8031 if (VD && VD->getType()->isAnyPointerType()) {
8032 for (const auto &M : Data.second) {
8033 HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8034 return isa_and_present<DeclRefExpr>(L.VarRef);
8036 HasMapArraySec = any_of(M, [](const MapInfo &L) {
8037 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8038 L.VarRef);
8040 if (HasMapBasePtr && HasMapArraySec)
8041 break;
8044 for (const auto &M : Data.second) {
8045 for (const MapInfo &L : M) {
8046 assert(!L.Components.empty() &&
8047 "Not expecting declaration with no component lists.");
8049 // Remember the current base pointer index.
8050 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8051 unsigned StructBasePointersIdx =
8052 StructBaseCurInfo.BasePointers.size();
8053 CurInfo.NonContigInfo.IsNonContiguous =
8054 L.Components.back().isNonContiguous();
8055 generateInfoForComponentList(
8056 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8057 CurInfo, StructBaseCurInfo, PartialStruct,
8058 /*IsFirstComponentList=*/false, L.IsImplicit,
8059 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8060 L.VarRef, /*OverlappedElements*/ {},
8061 HasMapBasePtr && HasMapArraySec);
8063 // If this entry relates to a device pointer, set the relevant
8064 // declaration and add the 'return pointer' flag.
8065 if (L.ReturnDevicePointer) {
8066 // Check whether a value was added to either CurInfo or
8067 // StructBaseCurInfo and error if no value was added to either of
8068 // them:
8069 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8070 StructBasePointersIdx <
8071 StructBaseCurInfo.BasePointers.size()) &&
8072 "Unexpected number of mapped base pointers.");
8074 // Choose a base pointer index which is always valid:
8075 const ValueDecl *RelevantVD =
8076 L.Components.back().getAssociatedDeclaration();
8077 assert(RelevantVD &&
8078 "No relevant declaration related with device pointer??");
8080 // If StructBaseCurInfo has been updated this iteration then work on
8081 // the first new entry added to it i.e. make sure that when multiple
8082 // values are added to any of the lists, the first value added is
8083 // being modified by the assignments below (not the last value
8084 // added).
8085 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8086 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8087 RelevantVD;
8088 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8089 L.ForDeviceAddr ? DeviceInfoTy::Address
8090 : DeviceInfoTy::Pointer;
8091 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8092 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8093 } else {
8094 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8095 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8096 L.ForDeviceAddr ? DeviceInfoTy::Address
8097 : DeviceInfoTy::Pointer;
8098 CurInfo.Types[CurrentBasePointersIdx] |=
8099 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8105 // Append any pending zero-length pointers which are struct members and
8106 // used with use_device_ptr or use_device_addr.
8107 auto CI = DeferredInfo.find(Data.first);
8108 if (CI != DeferredInfo.end()) {
8109 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8110 llvm::Value *BasePtr;
8111 llvm::Value *Ptr;
8112 if (L.ForDeviceAddr) {
8113 if (L.IE->isGLValue())
8114 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8115 else
8116 Ptr = this->CGF.EmitScalarExpr(L.IE);
8117 BasePtr = Ptr;
8118 // Entry is RETURN_PARAM. Also, set the placeholder value
8119 // MEMBER_OF=FFFF so that the entry is later updated with the
8120 // correct value of MEMBER_OF.
8121 CurInfo.Types.push_back(
8122 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8123 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8124 } else {
8125 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8126 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8127 L.IE->getExprLoc());
8128 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8129 // placeholder value MEMBER_OF=FFFF so that the entry is later
8130 // updated with the correct value of MEMBER_OF.
8131 CurInfo.Types.push_back(
8132 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8133 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8134 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8136 CurInfo.Exprs.push_back(L.VD);
8137 CurInfo.BasePointers.emplace_back(BasePtr);
8138 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8139 CurInfo.DevicePointers.emplace_back(
8140 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8141 CurInfo.Pointers.push_back(Ptr);
8142 CurInfo.Sizes.push_back(
8143 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8144 CurInfo.Mappers.push_back(nullptr);
8148 // Unify entries in one list making sure the struct mapping precedes the
8149 // individual fields:
8150 MapCombinedInfoTy UnionCurInfo;
8151 UnionCurInfo.append(StructBaseCurInfo);
8152 UnionCurInfo.append(CurInfo);
8154 // If there is an entry in PartialStruct it means we have a struct with
8155 // individual members mapped. Emit an extra combined entry.
8156 if (PartialStruct.Base.isValid()) {
8157 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8158 // Emit a combined entry:
8159 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8160 /*IsMapThis*/ !VD, OMPBuilder, VD);
8163 // We need to append the results of this capture to what we already have.
8164 CombinedInfo.append(UnionCurInfo);
8166 // Append data for use_device_ptr clauses.
8167 CombinedInfo.append(UseDeviceDataCombinedInfo);
8170 public:
8171 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8172 : CurDir(&Dir), CGF(CGF) {
8173 // Extract firstprivate clause information.
8174 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8175 for (const auto *D : C->varlist())
8176 FirstPrivateDecls.try_emplace(
8177 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8178 // Extract implicit firstprivates from uses_allocators clauses.
8179 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8180 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8181 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8182 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8183 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8184 /*Implicit=*/true);
8185 else if (const auto *VD = dyn_cast<VarDecl>(
8186 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8187 ->getDecl()))
8188 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8191 // Extract device pointer clause information.
8192 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8193 for (auto L : C->component_lists())
8194 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8195 // Extract device addr clause information.
8196 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8197 for (auto L : C->component_lists())
8198 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8199 // Extract map information.
8200 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8201 if (C->getMapType() != OMPC_MAP_to)
8202 continue;
8203 for (auto L : C->component_lists()) {
8204 const ValueDecl *VD = std::get<0>(L);
8205 const auto *RD = VD ? VD->getType()
8206 .getCanonicalType()
8207 .getNonReferenceType()
8208 ->getAsCXXRecordDecl()
8209 : nullptr;
8210 if (RD && RD->isLambda())
8211 LambdasMap.try_emplace(std::get<0>(L), C);
8216 /// Constructor for the declare mapper directive.
8217 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8218 : CurDir(&Dir), CGF(CGF) {}
8220 /// Generate code for the combined entry if we have a partially mapped struct
8221 /// and take care of the mapping flags of the arguments corresponding to
8222 /// individual struct members.
8223 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8224 MapFlagsArrayTy &CurTypes,
8225 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8226 llvm::OpenMPIRBuilder &OMPBuilder,
8227 const ValueDecl *VD = nullptr,
8228 bool NotTargetParams = true) const {
8229 if (CurTypes.size() == 1 &&
8230 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8231 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8232 !PartialStruct.IsArraySection)
8233 return;
8234 Address LBAddr = PartialStruct.LowestElem.second;
8235 Address HBAddr = PartialStruct.HighestElem.second;
8236 if (PartialStruct.HasCompleteRecord) {
8237 LBAddr = PartialStruct.LB;
8238 HBAddr = PartialStruct.LB;
8240 CombinedInfo.Exprs.push_back(VD);
8241 // Base is the base of the struct
8242 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8243 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8244 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8245 // Pointer is the address of the lowest element
8246 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8247 const CXXMethodDecl *MD =
8248 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8249 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8250 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8251 // There should not be a mapper for a combined entry.
8252 if (HasBaseClass) {
8253 // OpenMP 5.2 148:21:
8254 // If the target construct is within a class non-static member function,
8255 // and a variable is an accessible data member of the object for which the
8256 // non-static data member function is invoked, the variable is treated as
8257 // if the this[:1] expression had appeared in a map clause with a map-type
8258 // of tofrom.
8259 // Emit this[:1]
8260 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8261 QualType Ty = MD->getFunctionObjectParameterType();
8262 llvm::Value *Size =
8263 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8264 /*isSigned=*/true);
8265 CombinedInfo.Sizes.push_back(Size);
8266 } else {
8267 CombinedInfo.Pointers.push_back(LB);
8268 // Size is (addr of {highest+1} element) - (addr of lowest element)
8269 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8270 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8271 HBAddr.getElementType(), HB, /*Idx0=*/1);
8272 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8273 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8274 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8275 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8276 /*isSigned=*/false);
8277 CombinedInfo.Sizes.push_back(Size);
8279 CombinedInfo.Mappers.push_back(nullptr);
8280 // Map type is always TARGET_PARAM, if generate info for captures.
8281 CombinedInfo.Types.push_back(
8282 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8283 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
8284 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
8285 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8286 // If any element has the present modifier, then make sure the runtime
8287 // doesn't attempt to allocate the struct.
8288 if (CurTypes.end() !=
8289 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8290 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8291 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8293 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8294 // Remove TARGET_PARAM flag from the first element
8295 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8296 // If any element has the ompx_hold modifier, then make sure the runtime
8297 // uses the hold reference count for the struct as a whole so that it won't
8298 // be unmapped by an extra dynamic reference count decrement. Add it to all
8299 // elements as well so the runtime knows which reference count to check
8300 // when determining whether it's time for device-to-host transfers of
8301 // individual elements.
8302 if (CurTypes.end() !=
8303 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8304 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8305 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8306 })) {
8307 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8308 for (auto &M : CurTypes)
8309 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8312 // All other current entries will be MEMBER_OF the combined entry
8313 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8314 // 0xFFFF in the MEMBER_OF field).
8315 OpenMPOffloadMappingFlags MemberOfFlag =
8316 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8317 for (auto &M : CurTypes)
8318 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8321 /// Generate all the base pointers, section pointers, sizes, map types, and
8322 /// mappers for the extracted mappable expressions (all included in \a
8323 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8324 /// pair of the relevant declaration and index where it occurs is appended to
8325 /// the device pointers info array.
8326 void generateAllInfo(
8327 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8328 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8329 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8330 assert(CurDir.is<const OMPExecutableDirective *>() &&
8331 "Expect a executable directive");
8332 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8333 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8334 SkipVarSet);
8337 /// Generate all the base pointers, section pointers, sizes, map types, and
8338 /// mappers for the extracted map clauses of user-defined mapper (all included
8339 /// in \a CombinedInfo).
8340 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8341 llvm::OpenMPIRBuilder &OMPBuilder) const {
8342 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8343 "Expect a declare mapper directive");
8344 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8345 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8346 OMPBuilder);
8349 /// Emit capture info for lambdas for variables captured by reference.
8350 void generateInfoForLambdaCaptures(
8351 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8352 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8353 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8354 const auto *RD = VDType->getAsCXXRecordDecl();
8355 if (!RD || !RD->isLambda())
8356 return;
8357 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8358 CGF.getContext().getDeclAlign(VD));
8359 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8360 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8361 FieldDecl *ThisCapture = nullptr;
8362 RD->getCaptureFields(Captures, ThisCapture);
8363 if (ThisCapture) {
8364 LValue ThisLVal =
8365 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8366 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8367 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8368 VDLVal.getPointer(CGF));
8369 CombinedInfo.Exprs.push_back(VD);
8370 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8371 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8372 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8373 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8374 CombinedInfo.Sizes.push_back(
8375 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8376 CGF.Int64Ty, /*isSigned=*/true));
8377 CombinedInfo.Types.push_back(
8378 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8379 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8380 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8381 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8382 CombinedInfo.Mappers.push_back(nullptr);
8384 for (const LambdaCapture &LC : RD->captures()) {
8385 if (!LC.capturesVariable())
8386 continue;
8387 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8388 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8389 continue;
8390 auto It = Captures.find(VD);
8391 assert(It != Captures.end() && "Found lambda capture without field.");
8392 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8393 if (LC.getCaptureKind() == LCK_ByRef) {
8394 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8395 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8396 VDLVal.getPointer(CGF));
8397 CombinedInfo.Exprs.push_back(VD);
8398 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8399 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8400 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8401 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8402 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8403 CGF.getTypeSize(
8404 VD->getType().getCanonicalType().getNonReferenceType()),
8405 CGF.Int64Ty, /*isSigned=*/true));
8406 } else {
8407 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8408 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8409 VDLVal.getPointer(CGF));
8410 CombinedInfo.Exprs.push_back(VD);
8411 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8412 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8413 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8414 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8415 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8417 CombinedInfo.Types.push_back(
8418 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8419 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8420 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8421 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8422 CombinedInfo.Mappers.push_back(nullptr);
8426 /// Set correct indices for lambdas captures.
8427 void adjustMemberOfForLambdaCaptures(
8428 llvm::OpenMPIRBuilder &OMPBuilder,
8429 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8430 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8431 MapFlagsArrayTy &Types) const {
8432 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8433 // Set correct member_of idx for all implicit lambda captures.
8434 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8435 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8436 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8437 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8438 continue;
8439 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8440 assert(BasePtr && "Unable to find base lambda address.");
8441 int TgtIdx = -1;
8442 for (unsigned J = I; J > 0; --J) {
8443 unsigned Idx = J - 1;
8444 if (Pointers[Idx] != BasePtr)
8445 continue;
8446 TgtIdx = Idx;
8447 break;
8449 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8450 // All other current entries will be MEMBER_OF the combined entry
8451 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8452 // 0xFFFF in the MEMBER_OF field).
8453 OpenMPOffloadMappingFlags MemberOfFlag =
8454 OMPBuilder.getMemberOfFlag(TgtIdx);
8455 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8459 /// Generate the base pointers, section pointers, sizes, map types, and
8460 /// mappers associated to a given capture (all included in \a CombinedInfo).
8461 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8462 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8463 StructRangeInfoTy &PartialStruct) const {
8464 assert(!Cap->capturesVariableArrayType() &&
8465 "Not expecting to generate map info for a variable array type!");
8467 // We need to know when we generating information for the first component
8468 const ValueDecl *VD = Cap->capturesThis()
8469 ? nullptr
8470 : Cap->getCapturedVar()->getCanonicalDecl();
8472 // for map(to: lambda): skip here, processing it in
8473 // generateDefaultMapInfo
8474 if (LambdasMap.count(VD))
8475 return;
8477 // If this declaration appears in a is_device_ptr clause we just have to
8478 // pass the pointer by value. If it is a reference to a declaration, we just
8479 // pass its value.
8480 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8481 CombinedInfo.Exprs.push_back(VD);
8482 CombinedInfo.BasePointers.emplace_back(Arg);
8483 CombinedInfo.DevicePtrDecls.emplace_back(VD);
8484 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8485 CombinedInfo.Pointers.push_back(Arg);
8486 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8487 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8488 /*isSigned=*/true));
8489 CombinedInfo.Types.push_back(
8490 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8491 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8492 CombinedInfo.Mappers.push_back(nullptr);
8493 return;
8496 using MapData =
8497 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8498 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8499 const ValueDecl *, const Expr *>;
8500 SmallVector<MapData, 4> DeclComponentLists;
8501 // For member fields list in is_device_ptr, store it in
8502 // DeclComponentLists for generating components info.
8503 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8504 auto It = DevPointersMap.find(VD);
8505 if (It != DevPointersMap.end())
8506 for (const auto &MCL : It->second)
8507 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8508 /*IsImpicit = */ true, nullptr,
8509 nullptr);
8510 auto I = HasDevAddrsMap.find(VD);
8511 if (I != HasDevAddrsMap.end())
8512 for (const auto &MCL : I->second)
8513 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8514 /*IsImpicit = */ true, nullptr,
8515 nullptr);
8516 assert(CurDir.is<const OMPExecutableDirective *>() &&
8517 "Expect a executable directive");
8518 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8519 bool HasMapBasePtr = false;
8520 bool HasMapArraySec = false;
8521 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8522 const auto *EI = C->getVarRefs().begin();
8523 for (const auto L : C->decl_component_lists(VD)) {
8524 const ValueDecl *VDecl, *Mapper;
8525 // The Expression is not correct if the mapping is implicit
8526 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8527 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8528 std::tie(VDecl, Components, Mapper) = L;
8529 assert(VDecl == VD && "We got information for the wrong declaration??");
8530 assert(!Components.empty() &&
8531 "Not expecting declaration with no component lists.");
8532 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8533 HasMapBasePtr = true;
8534 if (VD && E && VD->getType()->isAnyPointerType() &&
8535 (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8536 HasMapArraySec = true;
8537 DeclComponentLists.emplace_back(Components, C->getMapType(),
8538 C->getMapTypeModifiers(),
8539 C->isImplicit(), Mapper, E);
8540 ++EI;
8543 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8544 const MapData &RHS) {
8545 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8546 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8547 bool HasPresent =
8548 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8549 bool HasAllocs = MapType == OMPC_MAP_alloc;
8550 MapModifiers = std::get<2>(RHS);
8551 MapType = std::get<1>(LHS);
8552 bool HasPresentR =
8553 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8554 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8555 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8558 // Find overlapping elements (including the offset from the base element).
8559 llvm::SmallDenseMap<
8560 const MapData *,
8561 llvm::SmallVector<
8562 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8564 OverlappedData;
8565 size_t Count = 0;
8566 for (const MapData &L : DeclComponentLists) {
8567 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8568 OpenMPMapClauseKind MapType;
8569 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8570 bool IsImplicit;
8571 const ValueDecl *Mapper;
8572 const Expr *VarRef;
8573 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8575 ++Count;
8576 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8577 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8578 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8579 VarRef) = L1;
8580 auto CI = Components.rbegin();
8581 auto CE = Components.rend();
8582 auto SI = Components1.rbegin();
8583 auto SE = Components1.rend();
8584 for (; CI != CE && SI != SE; ++CI, ++SI) {
8585 if (CI->getAssociatedExpression()->getStmtClass() !=
8586 SI->getAssociatedExpression()->getStmtClass())
8587 break;
8588 // Are we dealing with different variables/fields?
8589 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8590 break;
8592 // Found overlapping if, at least for one component, reached the head
8593 // of the components list.
8594 if (CI == CE || SI == SE) {
8595 // Ignore it if it is the same component.
8596 if (CI == CE && SI == SE)
8597 continue;
8598 const auto It = (SI == SE) ? CI : SI;
8599 // If one component is a pointer and another one is a kind of
8600 // dereference of this pointer (array subscript, section, dereference,
8601 // etc.), it is not an overlapping.
8602 // Same, if one component is a base and another component is a
8603 // dereferenced pointer memberexpr with the same base.
8604 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8605 (std::prev(It)->getAssociatedDeclaration() &&
8606 std::prev(It)
8607 ->getAssociatedDeclaration()
8608 ->getType()
8609 ->isPointerType()) ||
8610 (It->getAssociatedDeclaration() &&
8611 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8612 std::next(It) != CE && std::next(It) != SE))
8613 continue;
8614 const MapData &BaseData = CI == CE ? L : L1;
8615 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8616 SI == SE ? Components : Components1;
8617 OverlappedData[&BaseData].push_back(SubData);
8621 // Sort the overlapped elements for each item.
8622 llvm::SmallVector<const FieldDecl *, 4> Layout;
8623 if (!OverlappedData.empty()) {
8624 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8625 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8626 while (BaseType != OrigType) {
8627 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8628 OrigType = BaseType->getPointeeOrArrayElementType();
8631 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8632 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8633 else {
8634 const auto *RD = BaseType->getAsRecordDecl();
8635 Layout.append(RD->field_begin(), RD->field_end());
8638 for (auto &Pair : OverlappedData) {
8639 llvm::stable_sort(
8640 Pair.getSecond(),
8641 [&Layout](
8642 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8643 OMPClauseMappableExprCommon::MappableExprComponentListRef
8644 Second) {
8645 auto CI = First.rbegin();
8646 auto CE = First.rend();
8647 auto SI = Second.rbegin();
8648 auto SE = Second.rend();
8649 for (; CI != CE && SI != SE; ++CI, ++SI) {
8650 if (CI->getAssociatedExpression()->getStmtClass() !=
8651 SI->getAssociatedExpression()->getStmtClass())
8652 break;
8653 // Are we dealing with different variables/fields?
8654 if (CI->getAssociatedDeclaration() !=
8655 SI->getAssociatedDeclaration())
8656 break;
8659 // Lists contain the same elements.
8660 if (CI == CE && SI == SE)
8661 return false;
8663 // List with less elements is less than list with more elements.
8664 if (CI == CE || SI == SE)
8665 return CI == CE;
8667 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8668 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8669 if (FD1->getParent() == FD2->getParent())
8670 return FD1->getFieldIndex() < FD2->getFieldIndex();
8671 const auto *It =
8672 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8673 return FD == FD1 || FD == FD2;
8675 return *It == FD1;
8679 // Associated with a capture, because the mapping flags depend on it.
8680 // Go through all of the elements with the overlapped elements.
8681 bool IsFirstComponentList = true;
8682 MapCombinedInfoTy StructBaseCombinedInfo;
8683 for (const auto &Pair : OverlappedData) {
8684 const MapData &L = *Pair.getFirst();
8685 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8686 OpenMPMapClauseKind MapType;
8687 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8688 bool IsImplicit;
8689 const ValueDecl *Mapper;
8690 const Expr *VarRef;
8691 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8693 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8694 OverlappedComponents = Pair.getSecond();
8695 generateInfoForComponentList(
8696 MapType, MapModifiers, {}, Components, CombinedInfo,
8697 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8698 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8699 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8700 IsFirstComponentList = false;
8702 // Go through other elements without overlapped elements.
8703 for (const MapData &L : DeclComponentLists) {
8704 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8705 OpenMPMapClauseKind MapType;
8706 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8707 bool IsImplicit;
8708 const ValueDecl *Mapper;
8709 const Expr *VarRef;
8710 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8712 auto It = OverlappedData.find(&L);
8713 if (It == OverlappedData.end())
8714 generateInfoForComponentList(
8715 MapType, MapModifiers, {}, Components, CombinedInfo,
8716 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8717 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8718 /*ForDeviceAddr=*/false, VD, VarRef,
8719 /*OverlappedElements*/ {}, HasMapBasePtr && HasMapArraySec);
8720 IsFirstComponentList = false;
8724 /// Generate the default map information for a given capture \a CI,
8725 /// record field declaration \a RI and captured value \a CV.
8726 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8727 const FieldDecl &RI, llvm::Value *CV,
8728 MapCombinedInfoTy &CombinedInfo) const {
8729 bool IsImplicit = true;
8730 // Do the default mapping.
8731 if (CI.capturesThis()) {
8732 CombinedInfo.Exprs.push_back(nullptr);
8733 CombinedInfo.BasePointers.push_back(CV);
8734 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8735 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8736 CombinedInfo.Pointers.push_back(CV);
8737 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8738 CombinedInfo.Sizes.push_back(
8739 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8740 CGF.Int64Ty, /*isSigned=*/true));
8741 // Default map type.
8742 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8743 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8744 } else if (CI.capturesVariableByCopy()) {
8745 const VarDecl *VD = CI.getCapturedVar();
8746 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8747 CombinedInfo.BasePointers.push_back(CV);
8748 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8749 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8750 CombinedInfo.Pointers.push_back(CV);
8751 if (!RI.getType()->isAnyPointerType()) {
8752 // We have to signal to the runtime captures passed by value that are
8753 // not pointers.
8754 CombinedInfo.Types.push_back(
8755 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8756 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8757 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8758 } else {
8759 // Pointers are implicitly mapped with a zero size and no flags
8760 // (other than first map that is added for all implicit maps).
8761 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8762 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8764 auto I = FirstPrivateDecls.find(VD);
8765 if (I != FirstPrivateDecls.end())
8766 IsImplicit = I->getSecond();
8767 } else {
8768 assert(CI.capturesVariable() && "Expected captured reference.");
8769 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8770 QualType ElementType = PtrTy->getPointeeType();
8771 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8772 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8773 // The default map type for a scalar/complex type is 'to' because by
8774 // default the value doesn't have to be retrieved. For an aggregate
8775 // type, the default is 'tofrom'.
8776 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8777 const VarDecl *VD = CI.getCapturedVar();
8778 auto I = FirstPrivateDecls.find(VD);
8779 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8780 CombinedInfo.BasePointers.push_back(CV);
8781 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8782 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8783 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8784 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8785 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8786 AlignmentSource::Decl));
8787 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
8788 } else {
8789 CombinedInfo.Pointers.push_back(CV);
8791 if (I != FirstPrivateDecls.end())
8792 IsImplicit = I->getSecond();
8794 // Every default map produces a single argument which is a target parameter.
8795 CombinedInfo.Types.back() |=
8796 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8798 // Add flag stating this is an implicit map.
8799 if (IsImplicit)
8800 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8802 // No user-defined mapper for default mapping.
8803 CombinedInfo.Mappers.push_back(nullptr);
8806 } // anonymous namespace
8808 // Try to extract the base declaration from a `this->x` expression if possible.
8809 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8810 if (!E)
8811 return nullptr;
8813 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
8814 if (const MemberExpr *ME =
8815 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8816 return ME->getMemberDecl();
8817 return nullptr;
8820 /// Emit a string constant containing the names of the values mapped to the
8821 /// offloading runtime library.
8822 static llvm::Constant *
8823 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8824 MappableExprsHandler::MappingExprInfo &MapExprs) {
8826 uint32_t SrcLocStrSize;
8827 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8828 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8830 SourceLocation Loc;
8831 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8832 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8833 Loc = VD->getLocation();
8834 else
8835 Loc = MapExprs.getMapExpr()->getExprLoc();
8836 } else {
8837 Loc = MapExprs.getMapDecl()->getLocation();
8840 std::string ExprName;
8841 if (MapExprs.getMapExpr()) {
8842 PrintingPolicy P(CGF.getContext().getLangOpts());
8843 llvm::raw_string_ostream OS(ExprName);
8844 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8845 } else {
8846 ExprName = MapExprs.getMapDecl()->getNameAsString();
8849 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
8850 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8851 PLoc.getLine(), PLoc.getColumn(),
8852 SrcLocStrSize);
8854 /// Emit the arrays used to pass the captures and map information to the
8855 /// offloading runtime library. If there is no map or capture information,
8856 /// return nullptr by reference.
8857 static void emitOffloadingArraysAndArgs(
8858 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8859 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8860 bool IsNonContiguous = false, bool ForEndCall = false) {
8861 CodeGenModule &CGM = CGF.CGM;
8863 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8864 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8865 CGF.AllocaInsertPt->getIterator());
8866 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8867 CGF.Builder.GetInsertPoint());
8869 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8870 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8871 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8875 auto CustomMapperCB = [&](unsigned int I) {
8876 llvm::Value *MFunc = nullptr;
8877 if (CombinedInfo.Mappers[I]) {
8878 Info.HasMapper = true;
8879 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8880 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8882 return MFunc;
8884 OMPBuilder.emitOffloadingArraysAndArgs(
8885 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, IsNonContiguous,
8886 ForEndCall, DeviceAddrCB, CustomMapperCB);
8889 /// Check for inner distribute directive.
8890 static const OMPExecutableDirective *
8891 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8892 const auto *CS = D.getInnermostCapturedStmt();
8893 const auto *Body =
8894 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8895 const Stmt *ChildStmt =
8896 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8898 if (const auto *NestedDir =
8899 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8900 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8901 switch (D.getDirectiveKind()) {
8902 case OMPD_target:
8903 // For now, treat 'target' with nested 'teams loop' as if it's
8904 // distributed (target teams distribute).
8905 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8906 return NestedDir;
8907 if (DKind == OMPD_teams) {
8908 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8909 /*IgnoreCaptured=*/true);
8910 if (!Body)
8911 return nullptr;
8912 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8913 if (const auto *NND =
8914 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8915 DKind = NND->getDirectiveKind();
8916 if (isOpenMPDistributeDirective(DKind))
8917 return NND;
8920 return nullptr;
8921 case OMPD_target_teams:
8922 if (isOpenMPDistributeDirective(DKind))
8923 return NestedDir;
8924 return nullptr;
8925 case OMPD_target_parallel:
8926 case OMPD_target_simd:
8927 case OMPD_target_parallel_for:
8928 case OMPD_target_parallel_for_simd:
8929 return nullptr;
8930 case OMPD_target_teams_distribute:
8931 case OMPD_target_teams_distribute_simd:
8932 case OMPD_target_teams_distribute_parallel_for:
8933 case OMPD_target_teams_distribute_parallel_for_simd:
8934 case OMPD_parallel:
8935 case OMPD_for:
8936 case OMPD_parallel_for:
8937 case OMPD_parallel_master:
8938 case OMPD_parallel_sections:
8939 case OMPD_for_simd:
8940 case OMPD_parallel_for_simd:
8941 case OMPD_cancel:
8942 case OMPD_cancellation_point:
8943 case OMPD_ordered:
8944 case OMPD_threadprivate:
8945 case OMPD_allocate:
8946 case OMPD_task:
8947 case OMPD_simd:
8948 case OMPD_tile:
8949 case OMPD_unroll:
8950 case OMPD_sections:
8951 case OMPD_section:
8952 case OMPD_single:
8953 case OMPD_master:
8954 case OMPD_critical:
8955 case OMPD_taskyield:
8956 case OMPD_barrier:
8957 case OMPD_taskwait:
8958 case OMPD_taskgroup:
8959 case OMPD_atomic:
8960 case OMPD_flush:
8961 case OMPD_depobj:
8962 case OMPD_scan:
8963 case OMPD_teams:
8964 case OMPD_target_data:
8965 case OMPD_target_exit_data:
8966 case OMPD_target_enter_data:
8967 case OMPD_distribute:
8968 case OMPD_distribute_simd:
8969 case OMPD_distribute_parallel_for:
8970 case OMPD_distribute_parallel_for_simd:
8971 case OMPD_teams_distribute:
8972 case OMPD_teams_distribute_simd:
8973 case OMPD_teams_distribute_parallel_for:
8974 case OMPD_teams_distribute_parallel_for_simd:
8975 case OMPD_target_update:
8976 case OMPD_declare_simd:
8977 case OMPD_declare_variant:
8978 case OMPD_begin_declare_variant:
8979 case OMPD_end_declare_variant:
8980 case OMPD_declare_target:
8981 case OMPD_end_declare_target:
8982 case OMPD_declare_reduction:
8983 case OMPD_declare_mapper:
8984 case OMPD_taskloop:
8985 case OMPD_taskloop_simd:
8986 case OMPD_master_taskloop:
8987 case OMPD_master_taskloop_simd:
8988 case OMPD_parallel_master_taskloop:
8989 case OMPD_parallel_master_taskloop_simd:
8990 case OMPD_requires:
8991 case OMPD_metadirective:
8992 case OMPD_unknown:
8993 default:
8994 llvm_unreachable("Unexpected directive.");
8998 return nullptr;
9001 /// Emit the user-defined mapper function. The code generation follows the
9002 /// pattern in the example below.
9003 /// \code
9004 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9005 /// void *base, void *begin,
9006 /// int64_t size, int64_t type,
9007 /// void *name = nullptr) {
9008 /// // Allocate space for an array section first or add a base/begin for
9009 /// // pointer dereference.
9010 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9011 /// !maptype.IsDelete)
9012 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9013 /// size*sizeof(Ty), clearToFromMember(type));
9014 /// // Map members.
9015 /// for (unsigned i = 0; i < size; i++) {
9016 /// // For each component specified by this mapper:
9017 /// for (auto c : begin[i]->all_components) {
9018 /// if (c.hasMapper())
9019 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9020 /// c.arg_type, c.arg_name);
9021 /// else
9022 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9023 /// c.arg_begin, c.arg_size, c.arg_type,
9024 /// c.arg_name);
9025 /// }
9026 /// }
9027 /// // Delete the array section.
9028 /// if (size > 1 && maptype.IsDelete)
9029 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9030 /// size*sizeof(Ty), clearToFromMember(type));
9031 /// }
9032 /// \endcode
9033 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9034 CodeGenFunction *CGF) {
9035 if (UDMMap.count(D) > 0)
9036 return;
9037 ASTContext &C = CGM.getContext();
9038 QualType Ty = D->getType();
9039 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9040 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9041 auto *MapperVarDecl =
9042 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9043 SourceLocation Loc = D->getLocation();
9044 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9045 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9047 // Prepare mapper function arguments and attributes.
9048 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9049 C.VoidPtrTy, ImplicitParamKind::Other);
9050 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9051 ImplicitParamKind::Other);
9052 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9053 C.VoidPtrTy, ImplicitParamKind::Other);
9054 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9055 ImplicitParamKind::Other);
9056 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9057 ImplicitParamKind::Other);
9058 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9059 ImplicitParamKind::Other);
9060 FunctionArgList Args;
9061 Args.push_back(&HandleArg);
9062 Args.push_back(&BaseArg);
9063 Args.push_back(&BeginArg);
9064 Args.push_back(&SizeArg);
9065 Args.push_back(&TypeArg);
9066 Args.push_back(&NameArg);
9067 const CGFunctionInfo &FnInfo =
9068 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9069 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9070 SmallString<64> TyStr;
9071 llvm::raw_svector_ostream Out(TyStr);
9072 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9073 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9074 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9075 Name, &CGM.getModule());
9076 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9077 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9078 // Start the mapper function code generation.
9079 CodeGenFunction MapperCGF(CGM);
9080 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9081 // Compute the starting and end addresses of array elements.
9082 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9083 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9084 C.getPointerType(Int64Ty), Loc);
9085 // Prepare common arguments for array initiation and deletion.
9086 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9087 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9088 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9089 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9090 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9091 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9092 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9093 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9094 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9095 // Convert the size in bytes into the number of array elements.
9096 Size = MapperCGF.Builder.CreateExactUDiv(
9097 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9098 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9099 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9100 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9101 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9102 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9103 C.getPointerType(Int64Ty), Loc);
9104 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9105 MapperCGF.GetAddrOfLocalVar(&NameArg),
9106 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9108 // Emit array initiation if this is an array section and \p MapType indicates
9109 // that memory allocation is required.
9110 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9111 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9112 MapName, ElementSize, HeadBB, /*IsInit=*/true);
9114 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9116 // Emit the loop header block.
9117 MapperCGF.EmitBlock(HeadBB);
9118 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9119 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9120 // Evaluate whether the initial condition is satisfied.
9121 llvm::Value *IsEmpty =
9122 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9123 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9124 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9126 // Emit the loop body block.
9127 MapperCGF.EmitBlock(BodyBB);
9128 llvm::BasicBlock *LastBB = BodyBB;
9129 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9130 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9131 PtrPHI->addIncoming(PtrBegin, EntryBB);
9132 Address PtrCurrent(PtrPHI, ElemTy,
9133 MapperCGF.GetAddrOfLocalVar(&BeginArg)
9134 .getAlignment()
9135 .alignmentOfArrayElement(ElementSize));
9136 // Privatize the declared variable of mapper to be the current array element.
9137 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9138 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9139 (void)Scope.Privatize();
9141 // Get map clause information. Fill up the arrays with all mapped variables.
9142 MappableExprsHandler::MapCombinedInfoTy Info;
9143 MappableExprsHandler MEHandler(*D, MapperCGF);
9144 MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
9146 // Call the runtime API __tgt_mapper_num_components to get the number of
9147 // pre-existing components.
9148 llvm::Value *OffloadingArgs[] = {Handle};
9149 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9150 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9151 OMPRTL___tgt_mapper_num_components),
9152 OffloadingArgs);
9153 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9154 PreviousSize,
9155 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9157 // Fill up the runtime mapper handle for all components.
9158 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9159 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9160 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9161 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9162 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9163 llvm::Value *CurSizeArg = Info.Sizes[I];
9164 llvm::Value *CurNameArg =
9165 (CGM.getCodeGenOpts().getDebugInfo() ==
9166 llvm::codegenoptions::NoDebugInfo)
9167 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9168 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9170 // Extract the MEMBER_OF field from the map type.
9171 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9172 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9173 Info.Types[I]));
9174 llvm::Value *MemberMapType =
9175 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9177 // Combine the map type inherited from user-defined mapper with that
9178 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9179 // bits of the \a MapType, which is the input argument of the mapper
9180 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9181 // bits of MemberMapType.
9182 // [OpenMP 5.0], 1.2.6. map-type decay.
9183 // | alloc | to | from | tofrom | release | delete
9184 // ----------------------------------------------------------
9185 // alloc | alloc | alloc | alloc | alloc | release | delete
9186 // to | alloc | to | alloc | to | release | delete
9187 // from | alloc | alloc | from | from | release | delete
9188 // tofrom | alloc | to | from | tofrom | release | delete
9189 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9190 MapType,
9191 MapperCGF.Builder.getInt64(
9192 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9193 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9194 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9195 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9196 llvm::BasicBlock *AllocElseBB =
9197 MapperCGF.createBasicBlock("omp.type.alloc.else");
9198 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9199 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9200 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9201 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9202 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9203 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9204 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9205 MapperCGF.EmitBlock(AllocBB);
9206 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9207 MemberMapType,
9208 MapperCGF.Builder.getInt64(
9209 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9210 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9211 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9212 MapperCGF.Builder.CreateBr(EndBB);
9213 MapperCGF.EmitBlock(AllocElseBB);
9214 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9215 LeftToFrom,
9216 MapperCGF.Builder.getInt64(
9217 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9218 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9219 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9220 // In case of to, clear OMP_MAP_FROM.
9221 MapperCGF.EmitBlock(ToBB);
9222 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9223 MemberMapType,
9224 MapperCGF.Builder.getInt64(
9225 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9226 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9227 MapperCGF.Builder.CreateBr(EndBB);
9228 MapperCGF.EmitBlock(ToElseBB);
9229 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9230 LeftToFrom,
9231 MapperCGF.Builder.getInt64(
9232 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9233 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9234 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9235 // In case of from, clear OMP_MAP_TO.
9236 MapperCGF.EmitBlock(FromBB);
9237 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9238 MemberMapType,
9239 MapperCGF.Builder.getInt64(
9240 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9241 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9242 // In case of tofrom, do nothing.
9243 MapperCGF.EmitBlock(EndBB);
9244 LastBB = EndBB;
9245 llvm::PHINode *CurMapType =
9246 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9247 CurMapType->addIncoming(AllocMapType, AllocBB);
9248 CurMapType->addIncoming(ToMapType, ToBB);
9249 CurMapType->addIncoming(FromMapType, FromBB);
9250 CurMapType->addIncoming(MemberMapType, ToElseBB);
9252 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9253 CurSizeArg, CurMapType, CurNameArg};
9254 if (Info.Mappers[I]) {
9255 // Call the corresponding mapper function.
9256 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9257 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9258 assert(MapperFunc && "Expect a valid mapper function is available.");
9259 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9260 } else {
9261 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9262 // data structure.
9263 MapperCGF.EmitRuntimeCall(
9264 OMPBuilder.getOrCreateRuntimeFunction(
9265 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9266 OffloadingArgs);
9270 // Update the pointer to point to the next element that needs to be mapped,
9271 // and check whether we have mapped all elements.
9272 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9273 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9274 PtrPHI->addIncoming(PtrNext, LastBB);
9275 llvm::Value *IsDone =
9276 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9277 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9278 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9280 MapperCGF.EmitBlock(ExitBB);
9281 // Emit array deletion if this is an array section and \p MapType indicates
9282 // that deletion is required.
9283 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9284 MapName, ElementSize, DoneBB, /*IsInit=*/false);
9286 // Emit the function exit block.
9287 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9288 MapperCGF.FinishFunction();
9289 UDMMap.try_emplace(D, Fn);
9290 if (CGF)
9291 FunctionUDMMap[CGF->CurFn].push_back(D);
9294 /// Emit the array initialization or deletion portion for user-defined mapper
9295 /// code generation. First, it evaluates whether an array section is mapped and
9296 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9297 /// true, and \a MapType indicates to not delete this array, array
9298 /// initialization code is generated. If \a IsInit is false, and \a MapType
9299 /// indicates to not this array, array deletion code is generated.
9300 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9301 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9302 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9303 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9304 bool IsInit) {
9305 StringRef Prefix = IsInit ? ".init" : ".del";
9307 // Evaluate if this is an array section.
9308 llvm::BasicBlock *BodyBB =
9309 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9310 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9311 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9312 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9313 MapType,
9314 MapperCGF.Builder.getInt64(
9315 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9316 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9317 llvm::Value *DeleteCond;
9318 llvm::Value *Cond;
9319 if (IsInit) {
9320 // base != begin?
9321 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9322 // IsPtrAndObj?
9323 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9324 MapType,
9325 MapperCGF.Builder.getInt64(
9326 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9327 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9328 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9329 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9330 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9331 DeleteCond = MapperCGF.Builder.CreateIsNull(
9332 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9333 } else {
9334 Cond = IsArray;
9335 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9336 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9338 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9339 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9341 MapperCGF.EmitBlock(BodyBB);
9342 // Get the array size by multiplying element size and element number (i.e., \p
9343 // Size).
9344 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9345 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9346 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9347 // memory allocation/deletion purpose only.
9348 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9349 MapType,
9350 MapperCGF.Builder.getInt64(
9351 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9352 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9353 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9354 MapTypeArg = MapperCGF.Builder.CreateOr(
9355 MapTypeArg,
9356 MapperCGF.Builder.getInt64(
9357 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9358 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9360 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9361 // data structure.
9362 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9363 ArraySize, MapTypeArg, MapName};
9364 MapperCGF.EmitRuntimeCall(
9365 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9366 OMPRTL___tgt_push_mapper_component),
9367 OffloadingArgs);
9370 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9371 const OMPDeclareMapperDecl *D) {
9372 auto I = UDMMap.find(D);
9373 if (I != UDMMap.end())
9374 return I->second;
9375 emitUserDefinedMapper(D);
9376 return UDMMap.lookup(D);
9379 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9380 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9381 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9382 const OMPLoopDirective &D)>
9383 SizeEmitter) {
9384 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9385 const OMPExecutableDirective *TD = &D;
9386 // Get nested teams distribute kind directive, if any. For now, treat
9387 // 'target_teams_loop' as if it's really a target_teams_distribute.
9388 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9389 Kind != OMPD_target_teams_loop)
9390 TD = getNestedDistributeDirective(CGM.getContext(), D);
9391 if (!TD)
9392 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9394 const auto *LD = cast<OMPLoopDirective>(TD);
9395 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9396 return NumIterations;
9397 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9400 static void
9401 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9402 const OMPExecutableDirective &D,
9403 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9404 bool RequiresOuterTask, const CapturedStmt &CS,
9405 bool OffloadingMandatory, CodeGenFunction &CGF) {
9406 if (OffloadingMandatory) {
9407 CGF.Builder.CreateUnreachable();
9408 } else {
9409 if (RequiresOuterTask) {
9410 CapturedVars.clear();
9411 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9413 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9414 CapturedVars);
9418 static llvm::Value *emitDeviceID(
9419 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9420 CodeGenFunction &CGF) {
9421 // Emit device ID if any.
9422 llvm::Value *DeviceID;
9423 if (Device.getPointer()) {
9424 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9425 Device.getInt() == OMPC_DEVICE_device_num) &&
9426 "Expected device_num modifier.");
9427 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9428 DeviceID =
9429 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9430 } else {
9431 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9433 return DeviceID;
9436 static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9437 CodeGenFunction &CGF) {
9438 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9440 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9441 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9442 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9443 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9444 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9445 /*isSigned=*/false);
9447 return DynCGroupMem;
9449 static void genMapInfoForCaptures(
9450 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9451 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9452 llvm::OpenMPIRBuilder &OMPBuilder,
9453 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
9454 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9456 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9457 auto RI = CS.getCapturedRecordDecl()->field_begin();
9458 auto *CV = CapturedVars.begin();
9459 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9460 CE = CS.capture_end();
9461 CI != CE; ++CI, ++RI, ++CV) {
9462 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9463 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9465 // VLA sizes are passed to the outlined region by copy and do not have map
9466 // information associated.
9467 if (CI->capturesVariableArrayType()) {
9468 CurInfo.Exprs.push_back(nullptr);
9469 CurInfo.BasePointers.push_back(*CV);
9470 CurInfo.DevicePtrDecls.push_back(nullptr);
9471 CurInfo.DevicePointers.push_back(
9472 MappableExprsHandler::DeviceInfoTy::None);
9473 CurInfo.Pointers.push_back(*CV);
9474 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9475 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9476 // Copy to the device as an argument. No need to retrieve it.
9477 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9478 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9479 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9480 CurInfo.Mappers.push_back(nullptr);
9481 } else {
9482 // If we have any information in the map clause, we use it, otherwise we
9483 // just do a default mapping.
9484 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9485 if (!CI->capturesThis())
9486 MappedVarSet.insert(CI->getCapturedVar());
9487 else
9488 MappedVarSet.insert(nullptr);
9489 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9490 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9491 // Generate correct mapping for variables captured by reference in
9492 // lambdas.
9493 if (CI->capturesVariable())
9494 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9495 CurInfo, LambdaPointers);
9497 // We expect to have at least an element of information for this capture.
9498 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9499 "Non-existing map pointer for capture!");
9500 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9501 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9502 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9503 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9504 "Inconsistent map information sizes!");
9506 // If there is an entry in PartialStruct it means we have a struct with
9507 // individual members mapped. Emit an extra combined entry.
9508 if (PartialStruct.Base.isValid()) {
9509 CombinedInfo.append(PartialStruct.PreliminaryMapData);
9510 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
9511 CI->capturesThis(), OMPBuilder, nullptr,
9512 /*NotTargetParams*/ false);
9515 // We need to append the results of this capture to what we already have.
9516 CombinedInfo.append(CurInfo);
9518 // Adjust MEMBER_OF flags for the lambdas captures.
9519 MEHandler.adjustMemberOfForLambdaCaptures(
9520 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9521 CombinedInfo.Pointers, CombinedInfo.Types);
9523 static void
9524 genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9525 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9526 llvm::OpenMPIRBuilder &OMPBuilder,
9527 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
9528 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
9530 CodeGenModule &CGM = CGF.CGM;
9531 // Map any list items in a map clause that were not captures because they
9532 // weren't referenced within the construct.
9533 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
9535 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9536 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9538 if (CGM.getCodeGenOpts().getDebugInfo() !=
9539 llvm::codegenoptions::NoDebugInfo) {
9540 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9541 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9542 FillInfoMap);
9546 static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
9547 const CapturedStmt &CS,
9548 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9549 llvm::OpenMPIRBuilder &OMPBuilder,
9550 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9551 // Get mappable expression information.
9552 MappableExprsHandler MEHandler(D, CGF);
9553 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9555 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
9556 MappedVarSet, CombinedInfo);
9557 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
9560 template <typename ClauseTy>
9561 static void
9562 emitClauseForBareTargetDirective(CodeGenFunction &CGF,
9563 const OMPExecutableDirective &D,
9564 llvm::SmallVectorImpl<llvm::Value *> &Values) {
9565 const auto *C = D.getSingleClause<ClauseTy>();
9566 assert(!C->varlist_empty() &&
9567 "ompx_bare requires explicit num_teams and thread_limit");
9568 CodeGenFunction::RunCleanupsScope Scope(CGF);
9569 for (auto *E : C->varlist()) {
9570 llvm::Value *V = CGF.EmitScalarExpr(E);
9571 Values.push_back(
9572 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
9576 static void emitTargetCallKernelLaunch(
9577 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9578 const OMPExecutableDirective &D,
9579 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9580 const CapturedStmt &CS, bool OffloadingMandatory,
9581 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9582 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9583 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9584 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9585 const OMPLoopDirective &D)>
9586 SizeEmitter,
9587 CodeGenFunction &CGF, CodeGenModule &CGM) {
9588 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9590 // Fill up the arrays with all the captured variables.
9591 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9592 CGOpenMPRuntime::TargetDataInfo Info;
9593 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
9595 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
9596 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
9598 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9599 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9600 CGF.VoidPtrTy, CGM.getPointerAlign());
9601 InputInfo.PointersArray =
9602 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9603 InputInfo.SizesArray =
9604 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9605 InputInfo.MappersArray =
9606 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9607 MapTypesArray = Info.RTArgs.MapTypesArray;
9608 MapNamesArray = Info.RTArgs.MapNamesArray;
9610 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9611 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9612 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9613 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9614 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9616 if (IsReverseOffloading) {
9617 // Reverse offloading is not supported, so just execute on the host.
9618 // FIXME: This fallback solution is incorrect since it ignores the
9619 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9620 // assert here and ensure SEMA emits an error.
9621 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9622 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9623 return;
9626 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9627 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9629 llvm::Value *BasePointersArray =
9630 InputInfo.BasePointersArray.emitRawPointer(CGF);
9631 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9632 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9633 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9635 auto &&EmitTargetCallFallbackCB =
9636 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9637 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9638 -> llvm::OpenMPIRBuilder::InsertPointTy {
9639 CGF.Builder.restoreIP(IP);
9640 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9641 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9642 return CGF.Builder.saveIP();
9645 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
9646 SmallVector<llvm::Value *, 3> NumTeams;
9647 SmallVector<llvm::Value *, 3> NumThreads;
9648 if (IsBare) {
9649 emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, NumTeams);
9650 emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
9651 NumThreads);
9652 } else {
9653 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
9654 NumThreads.push_back(
9655 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
9658 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9659 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9660 llvm::Value *NumIterations =
9661 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9662 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9663 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9664 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9666 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9667 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9668 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9670 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9671 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9672 DynCGGroupMem, HasNoWait);
9674 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
9675 OMPRuntime->getOMPBuilder().emitKernelLaunch(
9676 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
9677 RTLoc, AllocaIP);
9678 assert(AfterIP && "unexpected error creating kernel launch");
9679 CGF.Builder.restoreIP(*AfterIP);
9682 if (RequiresOuterTask)
9683 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9684 else
9685 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9688 static void
9689 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9690 const OMPExecutableDirective &D,
9691 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9692 bool RequiresOuterTask, const CapturedStmt &CS,
9693 bool OffloadingMandatory, CodeGenFunction &CGF) {
9695 // Notify that the host version must be executed.
9696 auto &&ElseGen =
9697 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9698 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9699 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9700 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9703 if (RequiresOuterTask) {
9704 CodeGenFunction::OMPTargetDataInfo InputInfo;
9705 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9706 } else {
9707 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9711 void CGOpenMPRuntime::emitTargetCall(
9712 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9713 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9714 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9715 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9716 const OMPLoopDirective &D)>
9717 SizeEmitter) {
9718 if (!CGF.HaveInsertPoint())
9719 return;
9721 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9722 CGM.getLangOpts().OpenMPOffloadMandatory;
9724 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9726 const bool RequiresOuterTask =
9727 D.hasClausesOfKind<OMPDependClause>() ||
9728 D.hasClausesOfKind<OMPNowaitClause>() ||
9729 D.hasClausesOfKind<OMPInReductionClause>() ||
9730 (CGM.getLangOpts().OpenMP >= 51 &&
9731 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9732 D.hasClausesOfKind<OMPThreadLimitClause>());
9733 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9734 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9735 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9736 PrePostActionTy &) {
9737 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9739 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9741 CodeGenFunction::OMPTargetDataInfo InputInfo;
9742 llvm::Value *MapTypesArray = nullptr;
9743 llvm::Value *MapNamesArray = nullptr;
9745 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9746 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9747 OutlinedFnID, &InputInfo, &MapTypesArray,
9748 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9749 PrePostActionTy &) {
9750 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9751 RequiresOuterTask, CS, OffloadingMandatory,
9752 Device, OutlinedFnID, InputInfo, MapTypesArray,
9753 MapNamesArray, SizeEmitter, CGF, CGM);
9756 auto &&TargetElseGen =
9757 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9758 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9759 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9760 CS, OffloadingMandatory, CGF);
9763 // If we have a target function ID it means that we need to support
9764 // offloading, otherwise, just execute on the host. We need to execute on host
9765 // regardless of the conditional in the if clause if, e.g., the user do not
9766 // specify target triples.
9767 if (OutlinedFnID) {
9768 if (IfCond) {
9769 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9770 } else {
9771 RegionCodeGenTy ThenRCG(TargetThenGen);
9772 ThenRCG(CGF);
9774 } else {
9775 RegionCodeGenTy ElseRCG(TargetElseGen);
9776 ElseRCG(CGF);
9780 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9781 StringRef ParentName) {
9782 if (!S)
9783 return;
9785 // Codegen OMP target directives that offload compute to the device.
9786 bool RequiresDeviceCodegen =
9787 isa<OMPExecutableDirective>(S) &&
9788 isOpenMPTargetExecutionDirective(
9789 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9791 if (RequiresDeviceCodegen) {
9792 const auto &E = *cast<OMPExecutableDirective>(S);
9794 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9795 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9797 // Is this a target region that should not be emitted as an entry point? If
9798 // so just signal we are done with this target region.
9799 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9800 return;
9802 switch (E.getDirectiveKind()) {
9803 case OMPD_target:
9804 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9805 cast<OMPTargetDirective>(E));
9806 break;
9807 case OMPD_target_parallel:
9808 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9809 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9810 break;
9811 case OMPD_target_teams:
9812 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9813 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9814 break;
9815 case OMPD_target_teams_distribute:
9816 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9817 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9818 break;
9819 case OMPD_target_teams_distribute_simd:
9820 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9821 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9822 break;
9823 case OMPD_target_parallel_for:
9824 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9825 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9826 break;
9827 case OMPD_target_parallel_for_simd:
9828 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9829 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9830 break;
9831 case OMPD_target_simd:
9832 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9833 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9834 break;
9835 case OMPD_target_teams_distribute_parallel_for:
9836 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9837 CGM, ParentName,
9838 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9839 break;
9840 case OMPD_target_teams_distribute_parallel_for_simd:
9841 CodeGenFunction::
9842 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9843 CGM, ParentName,
9844 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9845 break;
9846 case OMPD_target_teams_loop:
9847 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9848 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9849 break;
9850 case OMPD_target_parallel_loop:
9851 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9852 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9853 break;
9854 case OMPD_parallel:
9855 case OMPD_for:
9856 case OMPD_parallel_for:
9857 case OMPD_parallel_master:
9858 case OMPD_parallel_sections:
9859 case OMPD_for_simd:
9860 case OMPD_parallel_for_simd:
9861 case OMPD_cancel:
9862 case OMPD_cancellation_point:
9863 case OMPD_ordered:
9864 case OMPD_threadprivate:
9865 case OMPD_allocate:
9866 case OMPD_task:
9867 case OMPD_simd:
9868 case OMPD_tile:
9869 case OMPD_unroll:
9870 case OMPD_sections:
9871 case OMPD_section:
9872 case OMPD_single:
9873 case OMPD_master:
9874 case OMPD_critical:
9875 case OMPD_taskyield:
9876 case OMPD_barrier:
9877 case OMPD_taskwait:
9878 case OMPD_taskgroup:
9879 case OMPD_atomic:
9880 case OMPD_flush:
9881 case OMPD_depobj:
9882 case OMPD_scan:
9883 case OMPD_teams:
9884 case OMPD_target_data:
9885 case OMPD_target_exit_data:
9886 case OMPD_target_enter_data:
9887 case OMPD_distribute:
9888 case OMPD_distribute_simd:
9889 case OMPD_distribute_parallel_for:
9890 case OMPD_distribute_parallel_for_simd:
9891 case OMPD_teams_distribute:
9892 case OMPD_teams_distribute_simd:
9893 case OMPD_teams_distribute_parallel_for:
9894 case OMPD_teams_distribute_parallel_for_simd:
9895 case OMPD_target_update:
9896 case OMPD_declare_simd:
9897 case OMPD_declare_variant:
9898 case OMPD_begin_declare_variant:
9899 case OMPD_end_declare_variant:
9900 case OMPD_declare_target:
9901 case OMPD_end_declare_target:
9902 case OMPD_declare_reduction:
9903 case OMPD_declare_mapper:
9904 case OMPD_taskloop:
9905 case OMPD_taskloop_simd:
9906 case OMPD_master_taskloop:
9907 case OMPD_master_taskloop_simd:
9908 case OMPD_parallel_master_taskloop:
9909 case OMPD_parallel_master_taskloop_simd:
9910 case OMPD_requires:
9911 case OMPD_metadirective:
9912 case OMPD_unknown:
9913 default:
9914 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9916 return;
9919 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9920 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9921 return;
9923 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9924 return;
9927 // If this is a lambda function, look into its body.
9928 if (const auto *L = dyn_cast<LambdaExpr>(S))
9929 S = L->getBody();
9931 // Keep looking for target regions recursively.
9932 for (const Stmt *II : S->children())
9933 scanForTargetRegionsFunctions(II, ParentName);
9936 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9937 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9938 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9939 if (!DevTy)
9940 return false;
9941 // Do not emit device_type(nohost) functions for the host.
9942 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9943 return true;
9944 // Do not emit device_type(host) functions for the device.
9945 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9946 return true;
9947 return false;
9950 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9951 // If emitting code for the host, we do not process FD here. Instead we do
9952 // the normal code generation.
9953 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9954 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9955 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9956 CGM.getLangOpts().OpenMPIsTargetDevice))
9957 return true;
9958 return false;
9961 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9962 // Try to detect target regions in the function.
9963 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9964 StringRef Name = CGM.getMangledName(GD);
9965 scanForTargetRegionsFunctions(FD->getBody(), Name);
9966 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9967 CGM.getLangOpts().OpenMPIsTargetDevice))
9968 return true;
9971 // Do not to emit function if it is not marked as declare target.
9972 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9973 AlreadyEmittedTargetDecls.count(VD) == 0;
9976 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9977 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9978 CGM.getLangOpts().OpenMPIsTargetDevice))
9979 return true;
9981 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9982 return false;
9984 // Check if there are Ctors/Dtors in this declaration and look for target
9985 // regions in it. We use the complete variant to produce the kernel name
9986 // mangling.
9987 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9988 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9989 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9990 StringRef ParentName =
9991 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9992 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9994 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9995 StringRef ParentName =
9996 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9997 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10001 // Do not to emit variable if it is not marked as declare target.
10002 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10003 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10004 cast<VarDecl>(GD.getDecl()));
10005 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10006 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10007 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10008 HasRequiresUnifiedSharedMemory)) {
10009 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10010 return true;
10012 return false;
10015 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10016 llvm::Constant *Addr) {
10017 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10018 !CGM.getLangOpts().OpenMPIsTargetDevice)
10019 return;
10021 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10022 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10024 // If this is an 'extern' declaration we defer to the canonical definition and
10025 // do not emit an offloading entry.
10026 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10027 VD->hasExternalStorage())
10028 return;
10030 if (!Res) {
10031 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10032 // Register non-target variables being emitted in device code (debug info
10033 // may cause this).
10034 StringRef VarName = CGM.getMangledName(VD);
10035 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10037 return;
10040 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10041 auto LinkageForVariable = [&VD, this]() {
10042 return CGM.getLLVMLinkageVarDefinition(VD);
10045 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10046 OMPBuilder.registerTargetGlobalVariable(
10047 convertCaptureClause(VD), convertDeviceClause(VD),
10048 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10049 VD->isExternallyVisible(),
10050 getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
10051 VD->getCanonicalDecl()->getBeginLoc()),
10052 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10053 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10054 CGM.getTypes().ConvertTypeForMem(
10055 CGM.getContext().getPointerType(VD->getType())),
10056 Addr);
10058 for (auto *ref : GeneratedRefs)
10059 CGM.addCompilerUsedGlobal(ref);
10062 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10063 if (isa<FunctionDecl>(GD.getDecl()) ||
10064 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10065 return emitTargetFunctions(GD);
10067 return emitTargetGlobalVariable(GD);
10070 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10071 for (const VarDecl *VD : DeferredGlobalVariables) {
10072 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10073 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10074 if (!Res)
10075 continue;
10076 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10077 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10078 !HasRequiresUnifiedSharedMemory) {
10079 CGM.EmitGlobal(VD);
10080 } else {
10081 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10082 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10083 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10084 HasRequiresUnifiedSharedMemory)) &&
10085 "Expected link clause or to clause with unified memory.");
10086 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10091 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10092 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10093 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10094 " Expected target-based directive.");
10097 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10098 for (const OMPClause *Clause : D->clauselists()) {
10099 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10100 HasRequiresUnifiedSharedMemory = true;
10101 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10102 } else if (const auto *AC =
10103 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10104 switch (AC->getAtomicDefaultMemOrderKind()) {
10105 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10106 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10107 break;
10108 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10109 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10110 break;
10111 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10112 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10113 break;
10114 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10115 break;
10121 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10122 return RequiresAtomicOrdering;
10125 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10126 LangAS &AS) {
10127 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10128 return false;
10129 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10130 switch(A->getAllocatorType()) {
10131 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10132 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10133 // Not supported, fallback to the default mem space.
10134 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10135 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10136 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10137 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10138 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10139 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10140 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10141 AS = LangAS::Default;
10142 return true;
10143 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10144 llvm_unreachable("Expected predefined allocator for the variables with the "
10145 "static storage.");
10147 return false;
10150 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10151 return HasRequiresUnifiedSharedMemory;
10154 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10155 CodeGenModule &CGM)
10156 : CGM(CGM) {
10157 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10158 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10159 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10163 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10164 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10165 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10168 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10169 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10170 return true;
10172 const auto *D = cast<FunctionDecl>(GD.getDecl());
10173 // Do not to emit function if it is marked as declare target as it was already
10174 // emitted.
10175 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10176 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10177 if (auto *F = dyn_cast_or_null<llvm::Function>(
10178 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10179 return !F->isDeclaration();
10180 return false;
10182 return true;
10185 return !AlreadyEmittedTargetDecls.insert(D).second;
10188 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10189 const OMPExecutableDirective &D,
10190 SourceLocation Loc,
10191 llvm::Function *OutlinedFn,
10192 ArrayRef<llvm::Value *> CapturedVars) {
10193 if (!CGF.HaveInsertPoint())
10194 return;
10196 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10197 CodeGenFunction::RunCleanupsScope Scope(CGF);
10199 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10200 llvm::Value *Args[] = {
10201 RTLoc,
10202 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10203 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10204 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10205 RealArgs.append(std::begin(Args), std::end(Args));
10206 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10208 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10209 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10210 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10213 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10214 const Expr *NumTeams,
10215 const Expr *ThreadLimit,
10216 SourceLocation Loc) {
10217 if (!CGF.HaveInsertPoint())
10218 return;
10220 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10222 llvm::Value *NumTeamsVal =
10223 NumTeams
10224 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10225 CGF.CGM.Int32Ty, /* isSigned = */ true)
10226 : CGF.Builder.getInt32(0);
10228 llvm::Value *ThreadLimitVal =
10229 ThreadLimit
10230 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10231 CGF.CGM.Int32Ty, /* isSigned = */ true)
10232 : CGF.Builder.getInt32(0);
10234 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10235 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10236 ThreadLimitVal};
10237 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10238 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10239 PushNumTeamsArgs);
10242 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10243 const Expr *ThreadLimit,
10244 SourceLocation Loc) {
10245 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10246 llvm::Value *ThreadLimitVal =
10247 ThreadLimit
10248 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10249 CGF.CGM.Int32Ty, /* isSigned = */ true)
10250 : CGF.Builder.getInt32(0);
10252 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10253 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10254 ThreadLimitVal};
10255 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10256 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10257 ThreadLimitArgs);
10260 void CGOpenMPRuntime::emitTargetDataCalls(
10261 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10262 const Expr *Device, const RegionCodeGenTy &CodeGen,
10263 CGOpenMPRuntime::TargetDataInfo &Info) {
10264 if (!CGF.HaveInsertPoint())
10265 return;
10267 // Action used to replace the default codegen action and turn privatization
10268 // off.
10269 PrePostActionTy NoPrivAction;
10271 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10273 llvm::Value *IfCondVal = nullptr;
10274 if (IfCond)
10275 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10277 // Emit device ID if any.
10278 llvm::Value *DeviceID = nullptr;
10279 if (Device) {
10280 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10281 CGF.Int64Ty, /*isSigned=*/true);
10282 } else {
10283 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10286 // Fill up the arrays with all the mapped variables.
10287 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10288 auto GenMapInfoCB =
10289 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10290 CGF.Builder.restoreIP(CodeGenIP);
10291 // Get map clause information.
10292 MappableExprsHandler MEHandler(D, CGF);
10293 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10295 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10296 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10298 if (CGM.getCodeGenOpts().getDebugInfo() !=
10299 llvm::codegenoptions::NoDebugInfo) {
10300 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10301 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10302 FillInfoMap);
10305 return CombinedInfo;
10307 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10308 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10309 CGF.Builder.restoreIP(CodeGenIP);
10310 switch (BodyGenType) {
10311 case BodyGenTy::Priv:
10312 if (!Info.CaptureDeviceAddrMap.empty())
10313 CodeGen(CGF);
10314 break;
10315 case BodyGenTy::DupNoPriv:
10316 if (!Info.CaptureDeviceAddrMap.empty()) {
10317 CodeGen.setAction(NoPrivAction);
10318 CodeGen(CGF);
10320 break;
10321 case BodyGenTy::NoPriv:
10322 if (Info.CaptureDeviceAddrMap.empty()) {
10323 CodeGen.setAction(NoPrivAction);
10324 CodeGen(CGF);
10326 break;
10328 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10329 CGF.Builder.GetInsertPoint());
10332 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10333 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10334 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10338 auto CustomMapperCB = [&](unsigned int I) {
10339 llvm::Value *MFunc = nullptr;
10340 if (CombinedInfo.Mappers[I]) {
10341 Info.HasMapper = true;
10342 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10343 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10345 return MFunc;
10348 // Source location for the ident struct
10349 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10351 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10352 CGF.AllocaInsertPt->getIterator());
10353 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10354 CGF.Builder.GetInsertPoint());
10355 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10356 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
10357 OMPBuilder.createTargetData(
10358 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10359 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc);
10360 assert(AfterIP && "unexpected error creating target data");
10361 CGF.Builder.restoreIP(*AfterIP);
10364 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10365 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10366 const Expr *Device) {
10367 if (!CGF.HaveInsertPoint())
10368 return;
10370 assert((isa<OMPTargetEnterDataDirective>(D) ||
10371 isa<OMPTargetExitDataDirective>(D) ||
10372 isa<OMPTargetUpdateDirective>(D)) &&
10373 "Expecting either target enter, exit data, or update directives.");
10375 CodeGenFunction::OMPTargetDataInfo InputInfo;
10376 llvm::Value *MapTypesArray = nullptr;
10377 llvm::Value *MapNamesArray = nullptr;
10378 // Generate the code for the opening of the data environment.
10379 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10380 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10381 // Emit device ID if any.
10382 llvm::Value *DeviceID = nullptr;
10383 if (Device) {
10384 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10385 CGF.Int64Ty, /*isSigned=*/true);
10386 } else {
10387 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10390 // Emit the number of elements in the offloading arrays.
10391 llvm::Constant *PointerNum =
10392 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10394 // Source location for the ident struct
10395 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10397 SmallVector<llvm::Value *, 13> OffloadingArgs(
10398 {RTLoc, DeviceID, PointerNum,
10399 InputInfo.BasePointersArray.emitRawPointer(CGF),
10400 InputInfo.PointersArray.emitRawPointer(CGF),
10401 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10402 InputInfo.MappersArray.emitRawPointer(CGF)});
10404 // Select the right runtime function call for each standalone
10405 // directive.
10406 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10407 RuntimeFunction RTLFn;
10408 switch (D.getDirectiveKind()) {
10409 case OMPD_target_enter_data:
10410 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10411 : OMPRTL___tgt_target_data_begin_mapper;
10412 break;
10413 case OMPD_target_exit_data:
10414 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10415 : OMPRTL___tgt_target_data_end_mapper;
10416 break;
10417 case OMPD_target_update:
10418 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10419 : OMPRTL___tgt_target_data_update_mapper;
10420 break;
10421 case OMPD_parallel:
10422 case OMPD_for:
10423 case OMPD_parallel_for:
10424 case OMPD_parallel_master:
10425 case OMPD_parallel_sections:
10426 case OMPD_for_simd:
10427 case OMPD_parallel_for_simd:
10428 case OMPD_cancel:
10429 case OMPD_cancellation_point:
10430 case OMPD_ordered:
10431 case OMPD_threadprivate:
10432 case OMPD_allocate:
10433 case OMPD_task:
10434 case OMPD_simd:
10435 case OMPD_tile:
10436 case OMPD_unroll:
10437 case OMPD_sections:
10438 case OMPD_section:
10439 case OMPD_single:
10440 case OMPD_master:
10441 case OMPD_critical:
10442 case OMPD_taskyield:
10443 case OMPD_barrier:
10444 case OMPD_taskwait:
10445 case OMPD_taskgroup:
10446 case OMPD_atomic:
10447 case OMPD_flush:
10448 case OMPD_depobj:
10449 case OMPD_scan:
10450 case OMPD_teams:
10451 case OMPD_target_data:
10452 case OMPD_distribute:
10453 case OMPD_distribute_simd:
10454 case OMPD_distribute_parallel_for:
10455 case OMPD_distribute_parallel_for_simd:
10456 case OMPD_teams_distribute:
10457 case OMPD_teams_distribute_simd:
10458 case OMPD_teams_distribute_parallel_for:
10459 case OMPD_teams_distribute_parallel_for_simd:
10460 case OMPD_declare_simd:
10461 case OMPD_declare_variant:
10462 case OMPD_begin_declare_variant:
10463 case OMPD_end_declare_variant:
10464 case OMPD_declare_target:
10465 case OMPD_end_declare_target:
10466 case OMPD_declare_reduction:
10467 case OMPD_declare_mapper:
10468 case OMPD_taskloop:
10469 case OMPD_taskloop_simd:
10470 case OMPD_master_taskloop:
10471 case OMPD_master_taskloop_simd:
10472 case OMPD_parallel_master_taskloop:
10473 case OMPD_parallel_master_taskloop_simd:
10474 case OMPD_target:
10475 case OMPD_target_simd:
10476 case OMPD_target_teams_distribute:
10477 case OMPD_target_teams_distribute_simd:
10478 case OMPD_target_teams_distribute_parallel_for:
10479 case OMPD_target_teams_distribute_parallel_for_simd:
10480 case OMPD_target_teams:
10481 case OMPD_target_parallel:
10482 case OMPD_target_parallel_for:
10483 case OMPD_target_parallel_for_simd:
10484 case OMPD_requires:
10485 case OMPD_metadirective:
10486 case OMPD_unknown:
10487 default:
10488 llvm_unreachable("Unexpected standalone target data directive.");
10489 break;
10491 if (HasNowait) {
10492 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10493 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10494 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10495 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10497 CGF.EmitRuntimeCall(
10498 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10499 OffloadingArgs);
10502 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10503 &MapNamesArray](CodeGenFunction &CGF,
10504 PrePostActionTy &) {
10505 // Fill up the arrays with all the mapped variables.
10506 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10507 CGOpenMPRuntime::TargetDataInfo Info;
10508 MappableExprsHandler MEHandler(D, CGF);
10509 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
10510 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10511 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10513 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10514 D.hasClausesOfKind<OMPNowaitClause>();
10516 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10517 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10518 CGF.VoidPtrTy, CGM.getPointerAlign());
10519 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10520 CGM.getPointerAlign());
10521 InputInfo.SizesArray =
10522 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10523 InputInfo.MappersArray =
10524 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10525 MapTypesArray = Info.RTArgs.MapTypesArray;
10526 MapNamesArray = Info.RTArgs.MapNamesArray;
10527 if (RequiresOuterTask)
10528 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10529 else
10530 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10533 if (IfCond) {
10534 emitIfClause(CGF, IfCond, TargetThenGen,
10535 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10536 } else {
10537 RegionCodeGenTy ThenRCG(TargetThenGen);
10538 ThenRCG(CGF);
10542 namespace {
10543 /// Kind of parameter in a function with 'declare simd' directive.
10544 enum ParamKindTy {
10545 Linear,
10546 LinearRef,
10547 LinearUVal,
10548 LinearVal,
10549 Uniform,
10550 Vector,
10552 /// Attribute set of the parameter.
10553 struct ParamAttrTy {
10554 ParamKindTy Kind = Vector;
10555 llvm::APSInt StrideOrArg;
10556 llvm::APSInt Alignment;
10557 bool HasVarStride = false;
10559 } // namespace
10561 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10562 ArrayRef<ParamAttrTy> ParamAttrs) {
10563 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10564 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10565 // of that clause. The VLEN value must be power of 2.
10566 // In other case the notion of the function`s "characteristic data type" (CDT)
10567 // is used to compute the vector length.
10568 // CDT is defined in the following order:
10569 // a) For non-void function, the CDT is the return type.
10570 // b) If the function has any non-uniform, non-linear parameters, then the
10571 // CDT is the type of the first such parameter.
10572 // c) If the CDT determined by a) or b) above is struct, union, or class
10573 // type which is pass-by-value (except for the type that maps to the
10574 // built-in complex data type), the characteristic data type is int.
10575 // d) If none of the above three cases is applicable, the CDT is int.
10576 // The VLEN is then determined based on the CDT and the size of vector
10577 // register of that ISA for which current vector version is generated. The
10578 // VLEN is computed using the formula below:
10579 // VLEN = sizeof(vector_register) / sizeof(CDT),
10580 // where vector register size specified in section 3.2.1 Registers and the
10581 // Stack Frame of original AMD64 ABI document.
10582 QualType RetType = FD->getReturnType();
10583 if (RetType.isNull())
10584 return 0;
10585 ASTContext &C = FD->getASTContext();
10586 QualType CDT;
10587 if (!RetType.isNull() && !RetType->isVoidType()) {
10588 CDT = RetType;
10589 } else {
10590 unsigned Offset = 0;
10591 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10592 if (ParamAttrs[Offset].Kind == Vector)
10593 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10594 ++Offset;
10596 if (CDT.isNull()) {
10597 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10598 if (ParamAttrs[I + Offset].Kind == Vector) {
10599 CDT = FD->getParamDecl(I)->getType();
10600 break;
10605 if (CDT.isNull())
10606 CDT = C.IntTy;
10607 CDT = CDT->getCanonicalTypeUnqualified();
10608 if (CDT->isRecordType() || CDT->isUnionType())
10609 CDT = C.IntTy;
10610 return C.getTypeSize(CDT);
10613 /// Mangle the parameter part of the vector function name according to
10614 /// their OpenMP classification. The mangling function is defined in
10615 /// section 4.5 of the AAVFABI(2021Q1).
10616 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10617 SmallString<256> Buffer;
10618 llvm::raw_svector_ostream Out(Buffer);
10619 for (const auto &ParamAttr : ParamAttrs) {
10620 switch (ParamAttr.Kind) {
10621 case Linear:
10622 Out << 'l';
10623 break;
10624 case LinearRef:
10625 Out << 'R';
10626 break;
10627 case LinearUVal:
10628 Out << 'U';
10629 break;
10630 case LinearVal:
10631 Out << 'L';
10632 break;
10633 case Uniform:
10634 Out << 'u';
10635 break;
10636 case Vector:
10637 Out << 'v';
10638 break;
10640 if (ParamAttr.HasVarStride)
10641 Out << "s" << ParamAttr.StrideOrArg;
10642 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10643 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10644 // Don't print the step value if it is not present or if it is
10645 // equal to 1.
10646 if (ParamAttr.StrideOrArg < 0)
10647 Out << 'n' << -ParamAttr.StrideOrArg;
10648 else if (ParamAttr.StrideOrArg != 1)
10649 Out << ParamAttr.StrideOrArg;
10652 if (!!ParamAttr.Alignment)
10653 Out << 'a' << ParamAttr.Alignment;
10656 return std::string(Out.str());
10659 static void
10660 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10661 const llvm::APSInt &VLENVal,
10662 ArrayRef<ParamAttrTy> ParamAttrs,
10663 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10664 struct ISADataTy {
10665 char ISA;
10666 unsigned VecRegSize;
10668 ISADataTy ISAData[] = {
10670 'b', 128
10671 }, // SSE
10673 'c', 256
10674 }, // AVX
10676 'd', 256
10677 }, // AVX2
10679 'e', 512
10680 }, // AVX512
10682 llvm::SmallVector<char, 2> Masked;
10683 switch (State) {
10684 case OMPDeclareSimdDeclAttr::BS_Undefined:
10685 Masked.push_back('N');
10686 Masked.push_back('M');
10687 break;
10688 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10689 Masked.push_back('N');
10690 break;
10691 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10692 Masked.push_back('M');
10693 break;
10695 for (char Mask : Masked) {
10696 for (const ISADataTy &Data : ISAData) {
10697 SmallString<256> Buffer;
10698 llvm::raw_svector_ostream Out(Buffer);
10699 Out << "_ZGV" << Data.ISA << Mask;
10700 if (!VLENVal) {
10701 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10702 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10703 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10704 } else {
10705 Out << VLENVal;
10707 Out << mangleVectorParameters(ParamAttrs);
10708 Out << '_' << Fn->getName();
10709 Fn->addFnAttr(Out.str());
10714 // This are the Functions that are needed to mangle the name of the
10715 // vector functions generated by the compiler, according to the rules
10716 // defined in the "Vector Function ABI specifications for AArch64",
10717 // available at
10718 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10720 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10721 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10722 QT = QT.getCanonicalType();
10724 if (QT->isVoidType())
10725 return false;
10727 if (Kind == ParamKindTy::Uniform)
10728 return false;
10730 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10731 return false;
10733 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10734 !QT->isReferenceType())
10735 return false;
10737 return true;
10740 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10741 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10742 QT = QT.getCanonicalType();
10743 unsigned Size = C.getTypeSize(QT);
10745 // Only scalars and complex within 16 bytes wide set PVB to true.
10746 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10747 return false;
10749 if (QT->isFloatingType())
10750 return true;
10752 if (QT->isIntegerType())
10753 return true;
10755 if (QT->isPointerType())
10756 return true;
10758 // TODO: Add support for complex types (section 3.1.2, item 2).
10760 return false;
10763 /// Computes the lane size (LS) of a return type or of an input parameter,
10764 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10765 /// TODO: Add support for references, section 3.2.1, item 1.
10766 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10767 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10768 QualType PTy = QT.getCanonicalType()->getPointeeType();
10769 if (getAArch64PBV(PTy, C))
10770 return C.getTypeSize(PTy);
10772 if (getAArch64PBV(QT, C))
10773 return C.getTypeSize(QT);
10775 return C.getTypeSize(C.getUIntPtrType());
10778 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10779 // signature of the scalar function, as defined in 3.2.2 of the
10780 // AAVFABI.
10781 static std::tuple<unsigned, unsigned, bool>
10782 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10783 QualType RetType = FD->getReturnType().getCanonicalType();
10785 ASTContext &C = FD->getASTContext();
10787 bool OutputBecomesInput = false;
10789 llvm::SmallVector<unsigned, 8> Sizes;
10790 if (!RetType->isVoidType()) {
10791 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10792 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10793 OutputBecomesInput = true;
10795 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10796 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10797 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10800 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10801 // The LS of a function parameter / return value can only be a power
10802 // of 2, starting from 8 bits, up to 128.
10803 assert(llvm::all_of(Sizes,
10804 [](unsigned Size) {
10805 return Size == 8 || Size == 16 || Size == 32 ||
10806 Size == 64 || Size == 128;
10807 }) &&
10808 "Invalid size");
10810 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10811 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10812 OutputBecomesInput);
10815 // Function used to add the attribute. The parameter `VLEN` is
10816 // templated to allow the use of "x" when targeting scalable functions
10817 // for SVE.
10818 template <typename T>
10819 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10820 char ISA, StringRef ParSeq,
10821 StringRef MangledName, bool OutputBecomesInput,
10822 llvm::Function *Fn) {
10823 SmallString<256> Buffer;
10824 llvm::raw_svector_ostream Out(Buffer);
10825 Out << Prefix << ISA << LMask << VLEN;
10826 if (OutputBecomesInput)
10827 Out << "v";
10828 Out << ParSeq << "_" << MangledName;
10829 Fn->addFnAttr(Out.str());
10832 // Helper function to generate the Advanced SIMD names depending on
10833 // the value of the NDS when simdlen is not present.
10834 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10835 StringRef Prefix, char ISA,
10836 StringRef ParSeq, StringRef MangledName,
10837 bool OutputBecomesInput,
10838 llvm::Function *Fn) {
10839 switch (NDS) {
10840 case 8:
10841 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10842 OutputBecomesInput, Fn);
10843 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10844 OutputBecomesInput, Fn);
10845 break;
10846 case 16:
10847 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10848 OutputBecomesInput, Fn);
10849 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10850 OutputBecomesInput, Fn);
10851 break;
10852 case 32:
10853 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10854 OutputBecomesInput, Fn);
10855 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10856 OutputBecomesInput, Fn);
10857 break;
10858 case 64:
10859 case 128:
10860 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10861 OutputBecomesInput, Fn);
10862 break;
10863 default:
10864 llvm_unreachable("Scalar type is too wide.");
10868 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10869 static void emitAArch64DeclareSimdFunction(
10870 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10871 ArrayRef<ParamAttrTy> ParamAttrs,
10872 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10873 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10875 // Get basic data for building the vector signature.
10876 const auto Data = getNDSWDS(FD, ParamAttrs);
10877 const unsigned NDS = std::get<0>(Data);
10878 const unsigned WDS = std::get<1>(Data);
10879 const bool OutputBecomesInput = std::get<2>(Data);
10881 // Check the values provided via `simdlen` by the user.
10882 // 1. A `simdlen(1)` doesn't produce vector signatures,
10883 if (UserVLEN == 1) {
10884 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10885 DiagnosticsEngine::Warning,
10886 "The clause simdlen(1) has no effect when targeting aarch64.");
10887 CGM.getDiags().Report(SLoc, DiagID);
10888 return;
10891 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10892 // Advanced SIMD output.
10893 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10894 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10895 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10896 "power of 2 when targeting Advanced SIMD.");
10897 CGM.getDiags().Report(SLoc, DiagID);
10898 return;
10901 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10902 // limits.
10903 if (ISA == 's' && UserVLEN != 0) {
10904 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10905 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10906 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10907 "lanes in the architectural constraints "
10908 "for SVE (min is 128-bit, max is "
10909 "2048-bit, by steps of 128-bit)");
10910 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10911 return;
10915 // Sort out parameter sequence.
10916 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10917 StringRef Prefix = "_ZGV";
10918 // Generate simdlen from user input (if any).
10919 if (UserVLEN) {
10920 if (ISA == 's') {
10921 // SVE generates only a masked function.
10922 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10923 OutputBecomesInput, Fn);
10924 } else {
10925 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10926 // Advanced SIMD generates one or two functions, depending on
10927 // the `[not]inbranch` clause.
10928 switch (State) {
10929 case OMPDeclareSimdDeclAttr::BS_Undefined:
10930 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10931 OutputBecomesInput, Fn);
10932 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10933 OutputBecomesInput, Fn);
10934 break;
10935 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10936 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10937 OutputBecomesInput, Fn);
10938 break;
10939 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10940 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10941 OutputBecomesInput, Fn);
10942 break;
10945 } else {
10946 // If no user simdlen is provided, follow the AAVFABI rules for
10947 // generating the vector length.
10948 if (ISA == 's') {
10949 // SVE, section 3.4.1, item 1.
10950 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10951 OutputBecomesInput, Fn);
10952 } else {
10953 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10954 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10955 // two vector names depending on the use of the clause
10956 // `[not]inbranch`.
10957 switch (State) {
10958 case OMPDeclareSimdDeclAttr::BS_Undefined:
10959 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10960 OutputBecomesInput, Fn);
10961 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10962 OutputBecomesInput, Fn);
10963 break;
10964 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10965 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10966 OutputBecomesInput, Fn);
10967 break;
10968 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10969 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10970 OutputBecomesInput, Fn);
10971 break;
10977 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10978 llvm::Function *Fn) {
10979 ASTContext &C = CGM.getContext();
10980 FD = FD->getMostRecentDecl();
10981 while (FD) {
10982 // Map params to their positions in function decl.
10983 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10984 if (isa<CXXMethodDecl>(FD))
10985 ParamPositions.try_emplace(FD, 0);
10986 unsigned ParamPos = ParamPositions.size();
10987 for (const ParmVarDecl *P : FD->parameters()) {
10988 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10989 ++ParamPos;
10991 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10992 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10993 // Mark uniform parameters.
10994 for (const Expr *E : Attr->uniforms()) {
10995 E = E->IgnoreParenImpCasts();
10996 unsigned Pos;
10997 if (isa<CXXThisExpr>(E)) {
10998 Pos = ParamPositions[FD];
10999 } else {
11000 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11001 ->getCanonicalDecl();
11002 auto It = ParamPositions.find(PVD);
11003 assert(It != ParamPositions.end() && "Function parameter not found");
11004 Pos = It->second;
11006 ParamAttrs[Pos].Kind = Uniform;
11008 // Get alignment info.
11009 auto *NI = Attr->alignments_begin();
11010 for (const Expr *E : Attr->aligneds()) {
11011 E = E->IgnoreParenImpCasts();
11012 unsigned Pos;
11013 QualType ParmTy;
11014 if (isa<CXXThisExpr>(E)) {
11015 Pos = ParamPositions[FD];
11016 ParmTy = E->getType();
11017 } else {
11018 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11019 ->getCanonicalDecl();
11020 auto It = ParamPositions.find(PVD);
11021 assert(It != ParamPositions.end() && "Function parameter not found");
11022 Pos = It->second;
11023 ParmTy = PVD->getType();
11025 ParamAttrs[Pos].Alignment =
11026 (*NI)
11027 ? (*NI)->EvaluateKnownConstInt(C)
11028 : llvm::APSInt::getUnsigned(
11029 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11030 .getQuantity());
11031 ++NI;
11033 // Mark linear parameters.
11034 auto *SI = Attr->steps_begin();
11035 auto *MI = Attr->modifiers_begin();
11036 for (const Expr *E : Attr->linears()) {
11037 E = E->IgnoreParenImpCasts();
11038 unsigned Pos;
11039 bool IsReferenceType = false;
11040 // Rescaling factor needed to compute the linear parameter
11041 // value in the mangled name.
11042 unsigned PtrRescalingFactor = 1;
11043 if (isa<CXXThisExpr>(E)) {
11044 Pos = ParamPositions[FD];
11045 auto *P = cast<PointerType>(E->getType());
11046 PtrRescalingFactor = CGM.getContext()
11047 .getTypeSizeInChars(P->getPointeeType())
11048 .getQuantity();
11049 } else {
11050 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11051 ->getCanonicalDecl();
11052 auto It = ParamPositions.find(PVD);
11053 assert(It != ParamPositions.end() && "Function parameter not found");
11054 Pos = It->second;
11055 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11056 PtrRescalingFactor = CGM.getContext()
11057 .getTypeSizeInChars(P->getPointeeType())
11058 .getQuantity();
11059 else if (PVD->getType()->isReferenceType()) {
11060 IsReferenceType = true;
11061 PtrRescalingFactor =
11062 CGM.getContext()
11063 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11064 .getQuantity();
11067 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11068 if (*MI == OMPC_LINEAR_ref)
11069 ParamAttr.Kind = LinearRef;
11070 else if (*MI == OMPC_LINEAR_uval)
11071 ParamAttr.Kind = LinearUVal;
11072 else if (IsReferenceType)
11073 ParamAttr.Kind = LinearVal;
11074 else
11075 ParamAttr.Kind = Linear;
11076 // Assuming a stride of 1, for `linear` without modifiers.
11077 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11078 if (*SI) {
11079 Expr::EvalResult Result;
11080 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11081 if (const auto *DRE =
11082 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11083 if (const auto *StridePVD =
11084 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11085 ParamAttr.HasVarStride = true;
11086 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11087 assert(It != ParamPositions.end() &&
11088 "Function parameter not found");
11089 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11092 } else {
11093 ParamAttr.StrideOrArg = Result.Val.getInt();
11096 // If we are using a linear clause on a pointer, we need to
11097 // rescale the value of linear_step with the byte size of the
11098 // pointee type.
11099 if (!ParamAttr.HasVarStride &&
11100 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11101 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11102 ++SI;
11103 ++MI;
11105 llvm::APSInt VLENVal;
11106 SourceLocation ExprLoc;
11107 const Expr *VLENExpr = Attr->getSimdlen();
11108 if (VLENExpr) {
11109 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11110 ExprLoc = VLENExpr->getExprLoc();
11112 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11113 if (CGM.getTriple().isX86()) {
11114 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11115 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11116 unsigned VLEN = VLENVal.getExtValue();
11117 StringRef MangledName = Fn->getName();
11118 if (CGM.getTarget().hasFeature("sve"))
11119 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11120 MangledName, 's', 128, Fn, ExprLoc);
11121 else if (CGM.getTarget().hasFeature("neon"))
11122 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11123 MangledName, 'n', 128, Fn, ExprLoc);
11126 FD = FD->getPreviousDecl();
11130 namespace {
11131 /// Cleanup action for doacross support.
11132 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11133 public:
11134 static const int DoacrossFinArgs = 2;
11136 private:
11137 llvm::FunctionCallee RTLFn;
11138 llvm::Value *Args[DoacrossFinArgs];
11140 public:
11141 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11142 ArrayRef<llvm::Value *> CallArgs)
11143 : RTLFn(RTLFn) {
11144 assert(CallArgs.size() == DoacrossFinArgs);
11145 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11147 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11148 if (!CGF.HaveInsertPoint())
11149 return;
11150 CGF.EmitRuntimeCall(RTLFn, Args);
11153 } // namespace
11155 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11156 const OMPLoopDirective &D,
11157 ArrayRef<Expr *> NumIterations) {
11158 if (!CGF.HaveInsertPoint())
11159 return;
11161 ASTContext &C = CGM.getContext();
11162 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11163 RecordDecl *RD;
11164 if (KmpDimTy.isNull()) {
11165 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11166 // kmp_int64 lo; // lower
11167 // kmp_int64 up; // upper
11168 // kmp_int64 st; // stride
11169 // };
11170 RD = C.buildImplicitRecord("kmp_dim");
11171 RD->startDefinition();
11172 addFieldToRecordDecl(C, RD, Int64Ty);
11173 addFieldToRecordDecl(C, RD, Int64Ty);
11174 addFieldToRecordDecl(C, RD, Int64Ty);
11175 RD->completeDefinition();
11176 KmpDimTy = C.getRecordType(RD);
11177 } else {
11178 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11180 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11181 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11182 ArraySizeModifier::Normal, 0);
11184 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11185 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11186 enum { LowerFD = 0, UpperFD, StrideFD };
11187 // Fill dims with data.
11188 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11189 LValue DimsLVal = CGF.MakeAddrLValue(
11190 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11191 // dims.upper = num_iterations;
11192 LValue UpperLVal = CGF.EmitLValueForField(
11193 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11194 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11195 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11196 Int64Ty, NumIterations[I]->getExprLoc());
11197 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11198 // dims.stride = 1;
11199 LValue StrideLVal = CGF.EmitLValueForField(
11200 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11201 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11202 StrideLVal);
11205 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11206 // kmp_int32 num_dims, struct kmp_dim * dims);
11207 llvm::Value *Args[] = {
11208 emitUpdateLocation(CGF, D.getBeginLoc()),
11209 getThreadID(CGF, D.getBeginLoc()),
11210 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11211 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11212 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11213 CGM.VoidPtrTy)};
11215 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11216 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11217 CGF.EmitRuntimeCall(RTLFn, Args);
11218 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11219 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11220 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11221 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11222 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11223 llvm::ArrayRef(FiniArgs));
11226 template <typename T>
11227 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11228 const T *C, llvm::Value *ULoc,
11229 llvm::Value *ThreadID) {
11230 QualType Int64Ty =
11231 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11232 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11233 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11234 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11235 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11236 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11237 const Expr *CounterVal = C->getLoopData(I);
11238 assert(CounterVal);
11239 llvm::Value *CntVal = CGF.EmitScalarConversion(
11240 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11241 CounterVal->getExprLoc());
11242 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11243 /*Volatile=*/false, Int64Ty);
11245 llvm::Value *Args[] = {
11246 ULoc, ThreadID,
11247 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11248 llvm::FunctionCallee RTLFn;
11249 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11250 OMPDoacrossKind<T> ODK;
11251 if (ODK.isSource(C)) {
11252 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11253 OMPRTL___kmpc_doacross_post);
11254 } else {
11255 assert(ODK.isSink(C) && "Expect sink modifier.");
11256 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11257 OMPRTL___kmpc_doacross_wait);
11259 CGF.EmitRuntimeCall(RTLFn, Args);
11262 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11263 const OMPDependClause *C) {
11264 return EmitDoacrossOrdered<OMPDependClause>(
11265 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11266 getThreadID(CGF, C->getBeginLoc()));
11269 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11270 const OMPDoacrossClause *C) {
11271 return EmitDoacrossOrdered<OMPDoacrossClause>(
11272 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11273 getThreadID(CGF, C->getBeginLoc()));
11276 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11277 llvm::FunctionCallee Callee,
11278 ArrayRef<llvm::Value *> Args) const {
11279 assert(Loc.isValid() && "Outlined function call location must be valid.");
11280 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11282 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11283 if (Fn->doesNotThrow()) {
11284 CGF.EmitNounwindRuntimeCall(Fn, Args);
11285 return;
11288 CGF.EmitRuntimeCall(Callee, Args);
11291 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11292 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11293 ArrayRef<llvm::Value *> Args) const {
11294 emitCall(CGF, Loc, OutlinedFn, Args);
11297 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11298 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11299 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11300 HasEmittedDeclareTargetRegion = true;
11303 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11304 const VarDecl *NativeParam,
11305 const VarDecl *TargetParam) const {
11306 return CGF.GetAddrOfLocalVar(NativeParam);
11309 /// Return allocator value from expression, or return a null allocator (default
11310 /// when no allocator specified).
11311 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11312 const Expr *Allocator) {
11313 llvm::Value *AllocVal;
11314 if (Allocator) {
11315 AllocVal = CGF.EmitScalarExpr(Allocator);
11316 // According to the standard, the original allocator type is a enum
11317 // (integer). Convert to pointer type, if required.
11318 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11319 CGF.getContext().VoidPtrTy,
11320 Allocator->getExprLoc());
11321 } else {
11322 // If no allocator specified, it defaults to the null allocator.
11323 AllocVal = llvm::Constant::getNullValue(
11324 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11326 return AllocVal;
11329 /// Return the alignment from an allocate directive if present.
11330 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11331 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11333 if (!AllocateAlignment)
11334 return nullptr;
11336 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11339 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11340 const VarDecl *VD) {
11341 if (!VD)
11342 return Address::invalid();
11343 Address UntiedAddr = Address::invalid();
11344 Address UntiedRealAddr = Address::invalid();
11345 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11346 if (It != FunctionToUntiedTaskStackMap.end()) {
11347 const UntiedLocalVarsAddressesMap &UntiedData =
11348 UntiedLocalVarsStack[It->second];
11349 auto I = UntiedData.find(VD);
11350 if (I != UntiedData.end()) {
11351 UntiedAddr = I->second.first;
11352 UntiedRealAddr = I->second.second;
11355 const VarDecl *CVD = VD->getCanonicalDecl();
11356 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11357 // Use the default allocation.
11358 if (!isAllocatableDecl(VD))
11359 return UntiedAddr;
11360 llvm::Value *Size;
11361 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11362 if (CVD->getType()->isVariablyModifiedType()) {
11363 Size = CGF.getTypeSize(CVD->getType());
11364 // Align the size: ((size + align - 1) / align) * align
11365 Size = CGF.Builder.CreateNUWAdd(
11366 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11367 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11368 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11369 } else {
11370 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11371 Size = CGM.getSize(Sz.alignTo(Align));
11373 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11374 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11375 const Expr *Allocator = AA->getAllocator();
11376 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11377 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11378 SmallVector<llvm::Value *, 4> Args;
11379 Args.push_back(ThreadID);
11380 if (Alignment)
11381 Args.push_back(Alignment);
11382 Args.push_back(Size);
11383 Args.push_back(AllocVal);
11384 llvm::omp::RuntimeFunction FnID =
11385 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11386 llvm::Value *Addr = CGF.EmitRuntimeCall(
11387 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11388 getName({CVD->getName(), ".void.addr"}));
11389 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11390 CGM.getModule(), OMPRTL___kmpc_free);
11391 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11392 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11393 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11394 if (UntiedAddr.isValid())
11395 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11397 // Cleanup action for allocate support.
11398 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11399 llvm::FunctionCallee RTLFn;
11400 SourceLocation::UIntTy LocEncoding;
11401 Address Addr;
11402 const Expr *AllocExpr;
11404 public:
11405 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11406 SourceLocation::UIntTy LocEncoding, Address Addr,
11407 const Expr *AllocExpr)
11408 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11409 AllocExpr(AllocExpr) {}
11410 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11411 if (!CGF.HaveInsertPoint())
11412 return;
11413 llvm::Value *Args[3];
11414 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11415 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11416 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11417 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11418 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11419 Args[2] = AllocVal;
11420 CGF.EmitRuntimeCall(RTLFn, Args);
11423 Address VDAddr =
11424 UntiedRealAddr.isValid()
11425 ? UntiedRealAddr
11426 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11427 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11428 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11429 VDAddr, Allocator);
11430 if (UntiedRealAddr.isValid())
11431 if (auto *Region =
11432 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11433 Region->emitUntiedSwitch(CGF);
11434 return VDAddr;
11436 return UntiedAddr;
11439 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11440 const VarDecl *VD) const {
11441 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11442 if (It == FunctionToUntiedTaskStackMap.end())
11443 return false;
11444 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11447 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11448 CodeGenModule &CGM, const OMPLoopDirective &S)
11449 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11450 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11451 if (!NeedToPush)
11452 return;
11453 NontemporalDeclsSet &DS =
11454 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11455 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11456 for (const Stmt *Ref : C->private_refs()) {
11457 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11458 const ValueDecl *VD;
11459 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11460 VD = DRE->getDecl();
11461 } else {
11462 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11463 assert((ME->isImplicitCXXThis() ||
11464 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11465 "Expected member of current class.");
11466 VD = ME->getMemberDecl();
11468 DS.insert(VD);
11473 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11474 if (!NeedToPush)
11475 return;
11476 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11479 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11480 CodeGenFunction &CGF,
11481 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11482 std::pair<Address, Address>> &LocalVars)
11483 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11484 if (!NeedToPush)
11485 return;
11486 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11487 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11488 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11491 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11492 if (!NeedToPush)
11493 return;
11494 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11497 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11498 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11500 return llvm::any_of(
11501 CGM.getOpenMPRuntime().NontemporalDeclsStack,
11502 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11505 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11506 const OMPExecutableDirective &S,
11507 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11508 const {
11509 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11510 // Vars in target/task regions must be excluded completely.
11511 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11512 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11513 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11514 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11515 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11516 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11517 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11518 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11521 // Exclude vars in private clauses.
11522 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11523 for (const Expr *Ref : C->varlist()) {
11524 if (!Ref->getType()->isScalarType())
11525 continue;
11526 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11527 if (!DRE)
11528 continue;
11529 NeedToCheckForLPCs.insert(DRE->getDecl());
11532 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11533 for (const Expr *Ref : C->varlist()) {
11534 if (!Ref->getType()->isScalarType())
11535 continue;
11536 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11537 if (!DRE)
11538 continue;
11539 NeedToCheckForLPCs.insert(DRE->getDecl());
11542 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11543 for (const Expr *Ref : C->varlist()) {
11544 if (!Ref->getType()->isScalarType())
11545 continue;
11546 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11547 if (!DRE)
11548 continue;
11549 NeedToCheckForLPCs.insert(DRE->getDecl());
11552 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11553 for (const Expr *Ref : C->varlist()) {
11554 if (!Ref->getType()->isScalarType())
11555 continue;
11556 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11557 if (!DRE)
11558 continue;
11559 NeedToCheckForLPCs.insert(DRE->getDecl());
11562 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11563 for (const Expr *Ref : C->varlist()) {
11564 if (!Ref->getType()->isScalarType())
11565 continue;
11566 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11567 if (!DRE)
11568 continue;
11569 NeedToCheckForLPCs.insert(DRE->getDecl());
11572 for (const Decl *VD : NeedToCheckForLPCs) {
11573 for (const LastprivateConditionalData &Data :
11574 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11575 if (Data.DeclToUniqueName.count(VD) > 0) {
11576 if (!Data.Disabled)
11577 NeedToAddForLPCsAsDisabled.insert(VD);
11578 break;
11584 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11585 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11586 : CGM(CGF.CGM),
11587 Action((CGM.getLangOpts().OpenMP >= 50 &&
11588 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11589 [](const OMPLastprivateClause *C) {
11590 return C->getKind() ==
11591 OMPC_LASTPRIVATE_conditional;
11593 ? ActionToDo::PushAsLastprivateConditional
11594 : ActionToDo::DoNotPush) {
11595 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11596 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11597 return;
11598 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11599 "Expected a push action.");
11600 LastprivateConditionalData &Data =
11601 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11602 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11603 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11604 continue;
11606 for (const Expr *Ref : C->varlist()) {
11607 Data.DeclToUniqueName.insert(std::make_pair(
11608 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11609 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11612 Data.IVLVal = IVLVal;
11613 Data.Fn = CGF.CurFn;
11616 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11617 CodeGenFunction &CGF, const OMPExecutableDirective &S)
11618 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11619 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11620 if (CGM.getLangOpts().OpenMP < 50)
11621 return;
11622 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11623 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11624 if (!NeedToAddForLPCsAsDisabled.empty()) {
11625 Action = ActionToDo::DisableLastprivateConditional;
11626 LastprivateConditionalData &Data =
11627 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11628 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11629 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11630 Data.Fn = CGF.CurFn;
11631 Data.Disabled = true;
11635 CGOpenMPRuntime::LastprivateConditionalRAII
11636 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11637 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11638 return LastprivateConditionalRAII(CGF, S);
11641 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11642 if (CGM.getLangOpts().OpenMP < 50)
11643 return;
11644 if (Action == ActionToDo::DisableLastprivateConditional) {
11645 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11646 "Expected list of disabled private vars.");
11647 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11649 if (Action == ActionToDo::PushAsLastprivateConditional) {
11650 assert(
11651 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11652 "Expected list of lastprivate conditional vars.");
11653 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11657 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11658 const VarDecl *VD) {
11659 ASTContext &C = CGM.getContext();
11660 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11661 QualType NewType;
11662 const FieldDecl *VDField;
11663 const FieldDecl *FiredField;
11664 LValue BaseLVal;
11665 auto VI = I->getSecond().find(VD);
11666 if (VI == I->getSecond().end()) {
11667 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11668 RD->startDefinition();
11669 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11670 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11671 RD->completeDefinition();
11672 NewType = C.getRecordType(RD);
11673 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11674 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11675 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11676 } else {
11677 NewType = std::get<0>(VI->getSecond());
11678 VDField = std::get<1>(VI->getSecond());
11679 FiredField = std::get<2>(VI->getSecond());
11680 BaseLVal = std::get<3>(VI->getSecond());
11682 LValue FiredLVal =
11683 CGF.EmitLValueForField(BaseLVal, FiredField);
11684 CGF.EmitStoreOfScalar(
11685 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11686 FiredLVal);
11687 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11690 namespace {
11691 /// Checks if the lastprivate conditional variable is referenced in LHS.
11692 class LastprivateConditionalRefChecker final
11693 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11694 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11695 const Expr *FoundE = nullptr;
11696 const Decl *FoundD = nullptr;
11697 StringRef UniqueDeclName;
11698 LValue IVLVal;
11699 llvm::Function *FoundFn = nullptr;
11700 SourceLocation Loc;
11702 public:
11703 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11704 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11705 llvm::reverse(LPM)) {
11706 auto It = D.DeclToUniqueName.find(E->getDecl());
11707 if (It == D.DeclToUniqueName.end())
11708 continue;
11709 if (D.Disabled)
11710 return false;
11711 FoundE = E;
11712 FoundD = E->getDecl()->getCanonicalDecl();
11713 UniqueDeclName = It->second;
11714 IVLVal = D.IVLVal;
11715 FoundFn = D.Fn;
11716 break;
11718 return FoundE == E;
11720 bool VisitMemberExpr(const MemberExpr *E) {
11721 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11722 return false;
11723 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11724 llvm::reverse(LPM)) {
11725 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11726 if (It == D.DeclToUniqueName.end())
11727 continue;
11728 if (D.Disabled)
11729 return false;
11730 FoundE = E;
11731 FoundD = E->getMemberDecl()->getCanonicalDecl();
11732 UniqueDeclName = It->second;
11733 IVLVal = D.IVLVal;
11734 FoundFn = D.Fn;
11735 break;
11737 return FoundE == E;
11739 bool VisitStmt(const Stmt *S) {
11740 for (const Stmt *Child : S->children()) {
11741 if (!Child)
11742 continue;
11743 if (const auto *E = dyn_cast<Expr>(Child))
11744 if (!E->isGLValue())
11745 continue;
11746 if (Visit(Child))
11747 return true;
11749 return false;
11751 explicit LastprivateConditionalRefChecker(
11752 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11753 : LPM(LPM) {}
11754 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11755 getFoundData() const {
11756 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11759 } // namespace
11761 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11762 LValue IVLVal,
11763 StringRef UniqueDeclName,
11764 LValue LVal,
11765 SourceLocation Loc) {
11766 // Last updated loop counter for the lastprivate conditional var.
11767 // int<xx> last_iv = 0;
11768 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11769 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11770 LLIVTy, getName({UniqueDeclName, "iv"}));
11771 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11772 IVLVal.getAlignment().getAsAlign());
11773 LValue LastIVLVal =
11774 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11776 // Last value of the lastprivate conditional.
11777 // decltype(priv_a) last_a;
11778 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11779 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11780 cast<llvm::GlobalVariable>(Last)->setAlignment(
11781 LVal.getAlignment().getAsAlign());
11782 LValue LastLVal =
11783 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11785 // Global loop counter. Required to handle inner parallel-for regions.
11786 // iv
11787 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11789 // #pragma omp critical(a)
11790 // if (last_iv <= iv) {
11791 // last_iv = iv;
11792 // last_a = priv_a;
11793 // }
11794 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11795 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11796 Action.Enter(CGF);
11797 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11798 // (last_iv <= iv) ? Check if the variable is updated and store new
11799 // value in global var.
11800 llvm::Value *CmpRes;
11801 if (IVLVal.getType()->isSignedIntegerType()) {
11802 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11803 } else {
11804 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11805 "Loop iteration variable must be integer.");
11806 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11808 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11809 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11810 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11811 // {
11812 CGF.EmitBlock(ThenBB);
11814 // last_iv = iv;
11815 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11817 // last_a = priv_a;
11818 switch (CGF.getEvaluationKind(LVal.getType())) {
11819 case TEK_Scalar: {
11820 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11821 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11822 break;
11824 case TEK_Complex: {
11825 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11826 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11827 break;
11829 case TEK_Aggregate:
11830 llvm_unreachable(
11831 "Aggregates are not supported in lastprivate conditional.");
11833 // }
11834 CGF.EmitBranch(ExitBB);
11835 // There is no need to emit line number for unconditional branch.
11836 (void)ApplyDebugLocation::CreateEmpty(CGF);
11837 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11840 if (CGM.getLangOpts().OpenMPSimd) {
11841 // Do not emit as a critical region as no parallel region could be emitted.
11842 RegionCodeGenTy ThenRCG(CodeGen);
11843 ThenRCG(CGF);
11844 } else {
11845 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11849 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11850 const Expr *LHS) {
11851 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11852 return;
11853 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11854 if (!Checker.Visit(LHS))
11855 return;
11856 const Expr *FoundE;
11857 const Decl *FoundD;
11858 StringRef UniqueDeclName;
11859 LValue IVLVal;
11860 llvm::Function *FoundFn;
11861 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11862 Checker.getFoundData();
11863 if (FoundFn != CGF.CurFn) {
11864 // Special codegen for inner parallel regions.
11865 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11866 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11867 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11868 "Lastprivate conditional is not found in outer region.");
11869 QualType StructTy = std::get<0>(It->getSecond());
11870 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11871 LValue PrivLVal = CGF.EmitLValue(FoundE);
11872 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11873 PrivLVal.getAddress(),
11874 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11875 CGF.ConvertTypeForMem(StructTy));
11876 LValue BaseLVal =
11877 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11878 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11879 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11880 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11881 FiredLVal, llvm::AtomicOrdering::Unordered,
11882 /*IsVolatile=*/true, /*isInit=*/false);
11883 return;
11886 // Private address of the lastprivate conditional in the current context.
11887 // priv_a
11888 LValue LVal = CGF.EmitLValue(FoundE);
11889 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11890 FoundE->getExprLoc());
11893 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11894 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11895 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11896 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11897 return;
11898 auto Range = llvm::reverse(LastprivateConditionalStack);
11899 auto It = llvm::find_if(
11900 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11901 if (It == Range.end() || It->Fn != CGF.CurFn)
11902 return;
11903 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11904 assert(LPCI != LastprivateConditionalToTypes.end() &&
11905 "Lastprivates must be registered already.");
11906 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11907 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11908 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11909 for (const auto &Pair : It->DeclToUniqueName) {
11910 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11911 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11912 continue;
11913 auto I = LPCI->getSecond().find(Pair.first);
11914 assert(I != LPCI->getSecond().end() &&
11915 "Lastprivate must be rehistered already.");
11916 // bool Cmp = priv_a.Fired != 0;
11917 LValue BaseLVal = std::get<3>(I->getSecond());
11918 LValue FiredLVal =
11919 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11920 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11921 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11922 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11923 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11924 // if (Cmp) {
11925 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11926 CGF.EmitBlock(ThenBB);
11927 Address Addr = CGF.GetAddrOfLocalVar(VD);
11928 LValue LVal;
11929 if (VD->getType()->isReferenceType())
11930 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11931 AlignmentSource::Decl);
11932 else
11933 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11934 AlignmentSource::Decl);
11935 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11936 D.getBeginLoc());
11937 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11938 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11939 // }
11943 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11944 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11945 SourceLocation Loc) {
11946 if (CGF.getLangOpts().OpenMP < 50)
11947 return;
11948 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11949 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11950 "Unknown lastprivate conditional variable.");
11951 StringRef UniqueName = It->second;
11952 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11953 // The variable was not updated in the region - exit.
11954 if (!GV)
11955 return;
11956 LValue LPLVal = CGF.MakeRawAddrLValue(
11957 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11958 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11959 CGF.EmitStoreOfScalar(Res, PrivLVal);
11962 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11963 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11964 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11965 const RegionCodeGenTy &CodeGen) {
11966 llvm_unreachable("Not supported in SIMD-only mode");
11969 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11970 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11971 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11972 const RegionCodeGenTy &CodeGen) {
11973 llvm_unreachable("Not supported in SIMD-only mode");
11976 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11977 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11978 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11979 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11980 bool Tied, unsigned &NumberOfParts) {
11981 llvm_unreachable("Not supported in SIMD-only mode");
11984 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11985 SourceLocation Loc,
11986 llvm::Function *OutlinedFn,
11987 ArrayRef<llvm::Value *> CapturedVars,
11988 const Expr *IfCond,
11989 llvm::Value *NumThreads) {
11990 llvm_unreachable("Not supported in SIMD-only mode");
11993 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11994 CodeGenFunction &CGF, StringRef CriticalName,
11995 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11996 const Expr *Hint) {
11997 llvm_unreachable("Not supported in SIMD-only mode");
12000 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12001 const RegionCodeGenTy &MasterOpGen,
12002 SourceLocation Loc) {
12003 llvm_unreachable("Not supported in SIMD-only mode");
12006 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12007 const RegionCodeGenTy &MasterOpGen,
12008 SourceLocation Loc,
12009 const Expr *Filter) {
12010 llvm_unreachable("Not supported in SIMD-only mode");
12013 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12014 SourceLocation Loc) {
12015 llvm_unreachable("Not supported in SIMD-only mode");
12018 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12019 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12020 SourceLocation Loc) {
12021 llvm_unreachable("Not supported in SIMD-only mode");
12024 void CGOpenMPSIMDRuntime::emitSingleRegion(
12025 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12026 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12027 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12028 ArrayRef<const Expr *> AssignmentOps) {
12029 llvm_unreachable("Not supported in SIMD-only mode");
12032 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12033 const RegionCodeGenTy &OrderedOpGen,
12034 SourceLocation Loc,
12035 bool IsThreads) {
12036 llvm_unreachable("Not supported in SIMD-only mode");
12039 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12040 SourceLocation Loc,
12041 OpenMPDirectiveKind Kind,
12042 bool EmitChecks,
12043 bool ForceSimpleCall) {
12044 llvm_unreachable("Not supported in SIMD-only mode");
12047 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12048 CodeGenFunction &CGF, SourceLocation Loc,
12049 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12050 bool Ordered, const DispatchRTInput &DispatchValues) {
12051 llvm_unreachable("Not supported in SIMD-only mode");
12054 void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
12055 SourceLocation Loc) {
12056 llvm_unreachable("Not supported in SIMD-only mode");
12059 void CGOpenMPSIMDRuntime::emitForStaticInit(
12060 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12061 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12062 llvm_unreachable("Not supported in SIMD-only mode");
12065 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12066 CodeGenFunction &CGF, SourceLocation Loc,
12067 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12068 llvm_unreachable("Not supported in SIMD-only mode");
12071 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12072 SourceLocation Loc,
12073 unsigned IVSize,
12074 bool IVSigned) {
12075 llvm_unreachable("Not supported in SIMD-only mode");
12078 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12079 SourceLocation Loc,
12080 OpenMPDirectiveKind DKind) {
12081 llvm_unreachable("Not supported in SIMD-only mode");
12084 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12085 SourceLocation Loc,
12086 unsigned IVSize, bool IVSigned,
12087 Address IL, Address LB,
12088 Address UB, Address ST) {
12089 llvm_unreachable("Not supported in SIMD-only mode");
12092 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12093 llvm::Value *NumThreads,
12094 SourceLocation Loc) {
12095 llvm_unreachable("Not supported in SIMD-only mode");
12098 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12099 ProcBindKind ProcBind,
12100 SourceLocation Loc) {
12101 llvm_unreachable("Not supported in SIMD-only mode");
12104 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12105 const VarDecl *VD,
12106 Address VDAddr,
12107 SourceLocation Loc) {
12108 llvm_unreachable("Not supported in SIMD-only mode");
12111 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12112 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12113 CodeGenFunction *CGF) {
12114 llvm_unreachable("Not supported in SIMD-only mode");
12117 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12118 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12119 llvm_unreachable("Not supported in SIMD-only mode");
12122 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12123 ArrayRef<const Expr *> Vars,
12124 SourceLocation Loc,
12125 llvm::AtomicOrdering AO) {
12126 llvm_unreachable("Not supported in SIMD-only mode");
12129 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12130 const OMPExecutableDirective &D,
12131 llvm::Function *TaskFunction,
12132 QualType SharedsTy, Address Shareds,
12133 const Expr *IfCond,
12134 const OMPTaskDataTy &Data) {
12135 llvm_unreachable("Not supported in SIMD-only mode");
12138 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12139 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12140 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12141 const Expr *IfCond, const OMPTaskDataTy &Data) {
12142 llvm_unreachable("Not supported in SIMD-only mode");
12145 void CGOpenMPSIMDRuntime::emitReduction(
12146 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12147 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12148 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12149 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12150 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12151 ReductionOps, Options);
12154 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12155 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12156 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12157 llvm_unreachable("Not supported in SIMD-only mode");
12160 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12161 SourceLocation Loc,
12162 bool IsWorksharingReduction) {
12163 llvm_unreachable("Not supported in SIMD-only mode");
12166 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12167 SourceLocation Loc,
12168 ReductionCodeGen &RCG,
12169 unsigned N) {
12170 llvm_unreachable("Not supported in SIMD-only mode");
12173 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12174 SourceLocation Loc,
12175 llvm::Value *ReductionsPtr,
12176 LValue SharedLVal) {
12177 llvm_unreachable("Not supported in SIMD-only mode");
12180 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12181 SourceLocation Loc,
12182 const OMPTaskDataTy &Data) {
12183 llvm_unreachable("Not supported in SIMD-only mode");
12186 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12187 CodeGenFunction &CGF, SourceLocation Loc,
12188 OpenMPDirectiveKind CancelRegion) {
12189 llvm_unreachable("Not supported in SIMD-only mode");
12192 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12193 SourceLocation Loc, const Expr *IfCond,
12194 OpenMPDirectiveKind CancelRegion) {
12195 llvm_unreachable("Not supported in SIMD-only mode");
12198 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12199 const OMPExecutableDirective &D, StringRef ParentName,
12200 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12201 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12202 llvm_unreachable("Not supported in SIMD-only mode");
12205 void CGOpenMPSIMDRuntime::emitTargetCall(
12206 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12207 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12208 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12209 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12210 const OMPLoopDirective &D)>
12211 SizeEmitter) {
12212 llvm_unreachable("Not supported in SIMD-only mode");
12215 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12216 llvm_unreachable("Not supported in SIMD-only mode");
12219 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12220 llvm_unreachable("Not supported in SIMD-only mode");
12223 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12224 return false;
12227 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12228 const OMPExecutableDirective &D,
12229 SourceLocation Loc,
12230 llvm::Function *OutlinedFn,
12231 ArrayRef<llvm::Value *> CapturedVars) {
12232 llvm_unreachable("Not supported in SIMD-only mode");
12235 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12236 const Expr *NumTeams,
12237 const Expr *ThreadLimit,
12238 SourceLocation Loc) {
12239 llvm_unreachable("Not supported in SIMD-only mode");
12242 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12243 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12244 const Expr *Device, const RegionCodeGenTy &CodeGen,
12245 CGOpenMPRuntime::TargetDataInfo &Info) {
12246 llvm_unreachable("Not supported in SIMD-only mode");
12249 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12250 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12251 const Expr *Device) {
12252 llvm_unreachable("Not supported in SIMD-only mode");
12255 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12256 const OMPLoopDirective &D,
12257 ArrayRef<Expr *> NumIterations) {
12258 llvm_unreachable("Not supported in SIMD-only mode");
12261 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12262 const OMPDependClause *C) {
12263 llvm_unreachable("Not supported in SIMD-only mode");
12266 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12267 const OMPDoacrossClause *C) {
12268 llvm_unreachable("Not supported in SIMD-only mode");
12271 const VarDecl *
12272 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12273 const VarDecl *NativeParam) const {
12274 llvm_unreachable("Not supported in SIMD-only mode");
12277 Address
12278 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12279 const VarDecl *NativeParam,
12280 const VarDecl *TargetParam) const {
12281 llvm_unreachable("Not supported in SIMD-only mode");