[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / clang / lib / CodeGen / CGOpenMPRuntime.cpp
blob34c9c02884ec5555c98a47abfa26e0740dcfd6c6
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
11 //===----------------------------------------------------------------------===//
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <cstdint>
45 #include <numeric>
46 #include <optional>
48 using namespace clang;
49 using namespace CodeGen;
50 using namespace llvm::omp;
52 namespace {
53 /// Base class for handling code generation inside OpenMP regions.
54 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55 public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
100 bool hasCancel() const { return HasCancel; }
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
106 ~CGOpenMPRegionInfo() override = default;
108 protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
111 OpenMPDirectiveKind Kind;
112 bool HasCancel;
115 /// API for captured statement code generation in OpenMP constructs.
116 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117 public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
141 private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
148 /// API for captured statement code generation in OpenMP constructs.
149 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150 public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 CGF.GetAddrOfLocalVar(PartIDVar),
166 PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171 CGF.EmitBlock(DoneBB);
172 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
173 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175 CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 CGF.GetAddrOfLocalVar(PartIDVar),
183 PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185 PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(".untied.next.");
189 CGF.EmitBranch(CGF.ReturnBlock.getBlock());
190 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192 CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(CurPoint);
194 CGF.EmitBlock(CurPoint.getBlock());
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226 TaskOutlinedRegion;
229 private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
237 /// API for inlined captured statement code generation in OpenMP
238 /// constructs.
239 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240 public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
260 llvm_unreachable("No context value for inlined OpenMP region");
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
312 ~CGOpenMPInlinedRegionInfo() override = default;
314 private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
320 /// API for captured statement code generation in OpenMP target
321 /// constructs. For this captures, implicit parameters are used instead of the
322 /// captured fields. The name of the target region has to be unique in a given
323 /// application so it is provided by the client, because only the client has
324 /// the information to generate that.
325 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326 public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
345 private:
346 StringRef HelperName;
349 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
352 /// API for generation of expressions captured in a innermost OpenMP
353 /// region.
354 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355 public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD->getType().getNonReferenceType(), VK_LValue,
375 C.getLocation());
376 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
378 (void)PrivScope.Privatize();
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
406 private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
411 /// RAII for emitting code of OpenMP constructs.
412 class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
419 public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
454 /// Values for bit flags used in the ident_t to describe the fields.
455 /// All enumeric elements are named and described in accordance with the code
456 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457 enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
483 /// Describes ident structure that describes a source location.
484 /// All descriptions are taken from
485 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486 /// Original structure:
487 /// typedef struct ident {
488 /// kmp_int32 reserved_1; /**< might be used in Fortran;
489 /// see above */
490 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491 /// KMP_IDENT_KMPC identifies this union
492 /// member */
493 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494 /// see above */
495 ///#if USE_ITT_BUILD
496 /// /* but currently used for storing
497 /// region-specific ITT */
498 /// /* contextual information. */
499 ///#endif /* USE_ITT_BUILD */
500 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501 /// C++ */
502 /// char const *psource; /**< String describing the source location.
503 /// The string is composed of semi-colon separated
504 // fields which describe the source file,
505 /// the function and a pair of line numbers that
506 /// delimit the construct.
507 /// */
508 /// } ident_t;
509 enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
524 /// Schedule types for 'omp for' loops (these enumerators are taken from
525 /// the enum sched_type in kmp.h).
526 enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
556 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557 /// region.
558 class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
561 public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
570 } // anonymous namespace
572 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
573 CodeGenFunction::RunCleanupsScope Scope(CGF);
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
583 /// Check if the combiner is a call to UDR combiner and if it is so return the
584 /// UDR decl used for reduction.
585 static const OMPDeclareReductionDecl *
586 getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
592 return DRD;
593 return nullptr;
596 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
603 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
604 const auto *CE = cast<CallExpr>(InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
614 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
615 (void)PrivateScope.Privatize();
616 RValue Func = RValue::get(Reduction.second);
617 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
618 CGF.EmitIgnoredExpr(InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
633 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646 /*IsInitializer=*/false);
650 /// Emit initialization of arrays of complex types.
651 /// \param DestAddr Address of the array.
652 /// \param Type Type of array.
653 /// \param Init Initial expression of array.
654 /// \param SrcAddr Address of the original array.
655 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.getPointer();
672 llvm::Value *DestBegin = DestAddr.getPointer();
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BodyBB);
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
693 "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(DestBegin, EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
706 // Emit copy.
708 CodeGenFunction::RunCleanupsScope InitScope(CGF);
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
711 SrcElementCurrent, ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
721 "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
728 "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
733 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
735 // Done.
736 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
739 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
743 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
746 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
747 return LValue();
750 void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SharedAddr);
767 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
768 ArrayRef<const Expr *> Origs,
769 ArrayRef<const Expr *> Privates,
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(Shareds.size());
772 SharedAddresses.reserve(Shareds.size());
773 Sizes.reserve(Shareds.size());
774 BaseDecls.reserve(Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
780 std::advance(IOrig, 1);
781 std::advance(IPriv, 1);
782 std::advance(IRed, 1);
786 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
791 SharedAddresses.emplace_back(First, Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(First, Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
797 OrigAddresses.emplace_back(First, Second);
801 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
807 nullptr);
808 return;
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemType,
816 OrigAddresses[N].second.getPointer(CGF),
817 OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateNUWAdd(
819 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
820 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
821 } else {
822 SizeInChars =
823 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
824 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
826 Sizes.emplace_back(SizeInChars, Size);
827 CodeGenFunction::OpaqueValueMapping OpaqueMap(
828 CGF,
829 cast<OpaqueValueExpr>(
830 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
831 RValue::get(Size));
832 CGF.EmitVariablyModifiedType(PrivateType);
835 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
836 llvm::Value *Size) {
837 QualType PrivateType = getPrivateType(N);
838 if (!PrivateType->isVariablyModifiedType()) {
839 assert(!Size && !Sizes[N].second &&
840 "Size should be nullptr for non-variably modified reduction "
841 "items.");
842 return;
844 CodeGenFunction::OpaqueValueMapping OpaqueMap(
845 CGF,
846 cast<OpaqueValueExpr>(
847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848 RValue::get(Size));
849 CGF.EmitVariablyModifiedType(PrivateType);
852 void ReductionCodeGen::emitInitialization(
853 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
854 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
855 assert(SharedAddresses.size() > N && "No variable was generated");
856 const auto *PrivateVD =
857 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
858 const OMPDeclareReductionDecl *DRD =
859 getReductionInit(ClausesData[N].ReductionOp);
860 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
861 if (DRD && DRD->getInitializer())
862 (void)DefaultInit(CGF);
863 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
864 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
865 (void)DefaultInit(CGF);
866 QualType SharedType = SharedAddresses[N].first.getType();
867 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
868 PrivateAddr, SharedAddr, SharedType);
869 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
870 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
871 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
872 PrivateVD->getType().getQualifiers(),
873 /*IsInitializer=*/false);
877 bool ReductionCodeGen::needCleanups(unsigned N) {
878 QualType PrivateType = getPrivateType(N);
879 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
880 return DTorKind != QualType::DK_none;
883 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
884 Address PrivateAddr) {
885 QualType PrivateType = getPrivateType(N);
886 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887 if (needCleanups(N)) {
888 PrivateAddr =
889 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
890 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
894 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
895 LValue BaseLV) {
896 BaseTy = BaseTy.getNonReferenceType();
897 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
898 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
899 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
900 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
901 } else {
902 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
903 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
905 BaseTy = BaseTy->getPointeeType();
907 return CGF.MakeAddrLValue(
908 BaseLV.getAddress(CGF).withElementType(CGF.ConvertTypeForMem(ElTy)),
909 BaseLV.getType(), BaseLV.getBaseInfo(),
910 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
913 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
914 Address OriginalBaseAddress, llvm::Value *Addr) {
915 Address Tmp = Address::invalid();
916 Address TopTmp = Address::invalid();
917 Address MostTopTmp = Address::invalid();
918 BaseTy = BaseTy.getNonReferenceType();
919 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
920 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
921 Tmp = CGF.CreateMemTemp(BaseTy);
922 if (TopTmp.isValid())
923 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
924 else
925 MostTopTmp = Tmp;
926 TopTmp = Tmp;
927 BaseTy = BaseTy->getPointeeType();
930 if (Tmp.isValid()) {
931 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
932 Addr, Tmp.getElementType());
933 CGF.Builder.CreateStore(Addr, Tmp);
934 return MostTopTmp;
937 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
938 Addr, OriginalBaseAddress.getType());
939 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
942 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
943 const VarDecl *OrigVD = nullptr;
944 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
945 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
947 Base = TempOASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
949 Base = TempASE->getBase()->IgnoreParenImpCasts();
950 DE = cast<DeclRefExpr>(Base);
951 OrigVD = cast<VarDecl>(DE->getDecl());
952 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
953 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
954 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
955 Base = TempASE->getBase()->IgnoreParenImpCasts();
956 DE = cast<DeclRefExpr>(Base);
957 OrigVD = cast<VarDecl>(DE->getDecl());
959 return OrigVD;
962 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
963 Address PrivateAddr) {
964 const DeclRefExpr *DE;
965 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
966 BaseDecls.emplace_back(OrigVD);
967 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
968 LValue BaseLValue =
969 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
970 OriginalBaseLValue);
971 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
972 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
973 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
974 SharedAddr.getPointer());
975 llvm::Value *PrivatePointer =
976 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
977 PrivateAddr.getPointer(), SharedAddr.getType());
978 llvm::Value *Ptr = CGF.Builder.CreateGEP(
979 SharedAddr.getElementType(), PrivatePointer, Adjustment);
980 return castToBase(CGF, OrigVD->getType(),
981 SharedAddresses[N].first.getType(),
982 OriginalBaseLValue.getAddress(CGF), Ptr);
984 BaseDecls.emplace_back(
985 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
986 return PrivateAddr;
989 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
990 const OMPDeclareReductionDecl *DRD =
991 getReductionInit(ClausesData[N].ReductionOp);
992 return DRD && DRD->getInitializer();
995 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
996 return CGF.EmitLoadOfPointerLValue(
997 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
998 getThreadIDVariable()->getType()->castAs<PointerType>());
1001 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1002 if (!CGF.HaveInsertPoint())
1003 return;
1004 // 1.2.2 OpenMP Language Terminology
1005 // Structured block - An executable statement with a single entry at the
1006 // top and a single exit at the bottom.
1007 // The point of exit cannot be a branch out of the structured block.
1008 // longjmp() and throw() must not violate the entry/exit criteria.
1009 CGF.EHStack.pushTerminate();
1010 if (S)
1011 CGF.incrementProfileCounter(S);
1012 CodeGen(CGF);
1013 CGF.EHStack.popTerminate();
1016 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017 CodeGenFunction &CGF) {
1018 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1019 getThreadIDVariable()->getType(),
1020 AlignmentSource::Decl);
1023 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1024 QualType FieldTy) {
1025 auto *Field = FieldDecl::Create(
1026 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1027 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1028 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1029 Field->setAccess(AS_public);
1030 DC->addDecl(Field);
1031 return Field;
1034 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1035 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1036 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1037 llvm::OpenMPIRBuilderConfig Config(
1038 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1039 CGM.getLangOpts().OpenMPOffloadMandatory,
1040 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042 OMPBuilder.initialize();
1043 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1044 ? CGM.getLangOpts().OMPHostIRFile
1045 : StringRef{});
1046 OMPBuilder.setConfig(Config);
1049 void CGOpenMPRuntime::clear() {
1050 InternalVars.clear();
1051 // Clean non-target variable declarations possibly used only in debug info.
1052 for (const auto &Data : EmittedNonTargetVariables) {
1053 if (!Data.getValue().pointsToAliveValue())
1054 continue;
1055 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1056 if (!GV)
1057 continue;
1058 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1059 continue;
1060 GV->eraseFromParent();
1064 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1065 return OMPBuilder.createPlatformSpecificName(Parts);
1068 static llvm::Function *
1069 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1070 const Expr *CombinerInitializer, const VarDecl *In,
1071 const VarDecl *Out, bool IsCombiner) {
1072 // void .omp_combiner.(Ty *in, Ty *out);
1073 ASTContext &C = CGM.getContext();
1074 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1075 FunctionArgList Args;
1076 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1077 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1078 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1079 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1080 Args.push_back(&OmpOutParm);
1081 Args.push_back(&OmpInParm);
1082 const CGFunctionInfo &FnInfo =
1083 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1084 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1085 std::string Name = CGM.getOpenMPRuntime().getName(
1086 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1087 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1088 Name, &CGM.getModule());
1089 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1090 if (CGM.getLangOpts().Optimize) {
1091 Fn->removeFnAttr(llvm::Attribute::NoInline);
1092 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1093 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1095 CodeGenFunction CGF(CGM);
1096 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1097 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1098 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1099 Out->getLocation());
1100 CodeGenFunction::OMPPrivateScope Scope(CGF);
1101 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1102 Scope.addPrivate(
1103 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1104 .getAddress(CGF));
1105 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1106 Scope.addPrivate(
1107 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1108 .getAddress(CGF));
1109 (void)Scope.Privatize();
1110 if (!IsCombiner && Out->hasInit() &&
1111 !CGF.isTrivialInitializer(Out->getInit())) {
1112 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1113 Out->getType().getQualifiers(),
1114 /*IsInitializer=*/true);
1116 if (CombinerInitializer)
1117 CGF.EmitIgnoredExpr(CombinerInitializer);
1118 Scope.ForceCleanup();
1119 CGF.FinishFunction();
1120 return Fn;
1123 void CGOpenMPRuntime::emitUserDefinedReduction(
1124 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1125 if (UDRMap.count(D) > 0)
1126 return;
1127 llvm::Function *Combiner = emitCombinerOrInitializer(
1128 CGM, D->getType(), D->getCombiner(),
1129 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1130 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1131 /*IsCombiner=*/true);
1132 llvm::Function *Initializer = nullptr;
1133 if (const Expr *Init = D->getInitializer()) {
1134 Initializer = emitCombinerOrInitializer(
1135 CGM, D->getType(),
1136 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1137 : nullptr,
1138 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1139 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1140 /*IsCombiner=*/false);
1142 UDRMap.try_emplace(D, Combiner, Initializer);
1143 if (CGF) {
1144 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1145 Decls.second.push_back(D);
1149 std::pair<llvm::Function *, llvm::Function *>
1150 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1151 auto I = UDRMap.find(D);
1152 if (I != UDRMap.end())
1153 return I->second;
1154 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1155 return UDRMap.lookup(D);
1158 namespace {
1159 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1160 // Builder if one is present.
1161 struct PushAndPopStackRAII {
1162 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1163 bool HasCancel, llvm::omp::Directive Kind)
1164 : OMPBuilder(OMPBuilder) {
1165 if (!OMPBuilder)
1166 return;
1168 // The following callback is the crucial part of clangs cleanup process.
1170 // NOTE:
1171 // Once the OpenMPIRBuilder is used to create parallel regions (and
1172 // similar), the cancellation destination (Dest below) is determined via
1173 // IP. That means if we have variables to finalize we split the block at IP,
1174 // use the new block (=BB) as destination to build a JumpDest (via
1175 // getJumpDestInCurrentScope(BB)) which then is fed to
1176 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1177 // to push & pop an FinalizationInfo object.
1178 // The FiniCB will still be needed but at the point where the
1179 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1180 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1181 assert(IP.getBlock()->end() == IP.getPoint() &&
1182 "Clang CG should cause non-terminated block!");
1183 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1184 CGF.Builder.restoreIP(IP);
1185 CodeGenFunction::JumpDest Dest =
1186 CGF.getOMPCancelDestination(OMPD_parallel);
1187 CGF.EmitBranchThroughCleanup(Dest);
1190 // TODO: Remove this once we emit parallel regions through the
1191 // OpenMPIRBuilder as it can do this setup internally.
1192 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1193 OMPBuilder->pushFinalizationCB(std::move(FI));
1195 ~PushAndPopStackRAII() {
1196 if (OMPBuilder)
1197 OMPBuilder->popFinalizationCB();
1199 llvm::OpenMPIRBuilder *OMPBuilder;
1201 } // namespace
1203 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1204 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1205 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1206 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1207 assert(ThreadIDVar->getType()->isPointerType() &&
1208 "thread id variable must be of type kmp_int32 *");
1209 CodeGenFunction CGF(CGM, true);
1210 bool HasCancel = false;
1211 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1212 HasCancel = OPD->hasCancel();
1213 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1214 HasCancel = OPD->hasCancel();
1215 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1216 HasCancel = OPSD->hasCancel();
1217 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1218 HasCancel = OPFD->hasCancel();
1219 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1220 HasCancel = OPFD->hasCancel();
1221 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1222 HasCancel = OPFD->hasCancel();
1223 else if (const auto *OPFD =
1224 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1225 HasCancel = OPFD->hasCancel();
1226 else if (const auto *OPFD =
1227 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1228 HasCancel = OPFD->hasCancel();
1230 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1231 // parallel region to make cancellation barriers work properly.
1232 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1233 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1234 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1235 HasCancel, OutlinedHelperName);
1236 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1237 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1240 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1241 std::string Suffix = getName({"omp_outlined"});
1242 return (Name + Suffix).str();
1245 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1246 return getOutlinedHelperName(CGF.CurFn->getName());
1249 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1250 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1251 return (Name + Suffix).str();
1254 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1255 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1256 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1257 const RegionCodeGenTy &CodeGen) {
1258 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1259 return emitParallelOrTeamsOutlinedFunction(
1260 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1261 CodeGen);
1264 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1265 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1266 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1267 const RegionCodeGenTy &CodeGen) {
1268 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1269 return emitParallelOrTeamsOutlinedFunction(
1270 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1271 CodeGen);
1274 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1275 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1277 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1278 bool Tied, unsigned &NumberOfParts) {
1279 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1280 PrePostActionTy &) {
1281 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1282 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1283 llvm::Value *TaskArgs[] = {
1284 UpLoc, ThreadID,
1285 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1286 TaskTVar->getType()->castAs<PointerType>())
1287 .getPointer(CGF)};
1288 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1289 CGM.getModule(), OMPRTL___kmpc_omp_task),
1290 TaskArgs);
1292 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1293 UntiedCodeGen);
1294 CodeGen.setAction(Action);
1295 assert(!ThreadIDVar->getType()->isPointerType() &&
1296 "thread id variable must be of type kmp_int32 for tasks");
1297 const OpenMPDirectiveKind Region =
1298 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1299 : OMPD_task;
1300 const CapturedStmt *CS = D.getCapturedStmt(Region);
1301 bool HasCancel = false;
1302 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1303 HasCancel = TD->hasCancel();
1304 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1305 HasCancel = TD->hasCancel();
1306 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1307 HasCancel = TD->hasCancel();
1308 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1309 HasCancel = TD->hasCancel();
1311 CodeGenFunction CGF(CGM, true);
1312 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1313 InnermostKind, HasCancel, Action);
1314 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1315 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1316 if (!Tied)
1317 NumberOfParts = Action.getNumberOfParts();
1318 return Res;
1321 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1322 bool AtCurrentPoint) {
1323 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1324 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1326 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1327 if (AtCurrentPoint) {
1328 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1329 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1330 } else {
1331 Elem.second.ServiceInsertPt =
1332 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1333 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1337 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1338 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1339 if (Elem.second.ServiceInsertPt) {
1340 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1341 Elem.second.ServiceInsertPt = nullptr;
1342 Ptr->eraseFromParent();
1346 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1347 SourceLocation Loc,
1348 SmallString<128> &Buffer) {
1349 llvm::raw_svector_ostream OS(Buffer);
1350 // Build debug location
1351 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1352 OS << ";" << PLoc.getFilename() << ";";
1353 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1354 OS << FD->getQualifiedNameAsString();
1355 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1356 return OS.str();
1359 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1360 SourceLocation Loc,
1361 unsigned Flags, bool EmitLoc) {
1362 uint32_t SrcLocStrSize;
1363 llvm::Constant *SrcLocStr;
1364 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1365 llvm::codegenoptions::NoDebugInfo) ||
1366 Loc.isInvalid()) {
1367 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1368 } else {
1369 std::string FunctionName;
1370 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1371 FunctionName = FD->getQualifiedNameAsString();
1372 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1373 const char *FileName = PLoc.getFilename();
1374 unsigned Line = PLoc.getLine();
1375 unsigned Column = PLoc.getColumn();
1376 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1377 Column, SrcLocStrSize);
1379 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1380 return OMPBuilder.getOrCreateIdent(
1381 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1384 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1385 SourceLocation Loc) {
1386 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1387 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1388 // the clang invariants used below might be broken.
1389 if (CGM.getLangOpts().OpenMPIRBuilder) {
1390 SmallString<128> Buffer;
1391 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1392 uint32_t SrcLocStrSize;
1393 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1394 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1395 return OMPBuilder.getOrCreateThreadID(
1396 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1399 llvm::Value *ThreadID = nullptr;
1400 // Check whether we've already cached a load of the thread id in this
1401 // function.
1402 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1403 if (I != OpenMPLocThreadIDMap.end()) {
1404 ThreadID = I->second.ThreadID;
1405 if (ThreadID != nullptr)
1406 return ThreadID;
1408 // If exceptions are enabled, do not use parameter to avoid possible crash.
1409 if (auto *OMPRegionInfo =
1410 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1411 if (OMPRegionInfo->getThreadIDVariable()) {
1412 // Check if this an outlined function with thread id passed as argument.
1413 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1414 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1415 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1416 !CGF.getLangOpts().CXXExceptions ||
1417 CGF.Builder.GetInsertBlock() == TopBlock ||
1418 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1419 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1420 TopBlock ||
1421 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1422 CGF.Builder.GetInsertBlock()) {
1423 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1424 // If value loaded in entry block, cache it and use it everywhere in
1425 // function.
1426 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1427 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1428 Elem.second.ThreadID = ThreadID;
1430 return ThreadID;
1435 // This is not an outlined function region - need to call __kmpc_int32
1436 // kmpc_global_thread_num(ident_t *loc).
1437 // Generate thread id value and cache this value for use across the
1438 // function.
1439 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1440 if (!Elem.second.ServiceInsertPt)
1441 setLocThreadIdInsertPt(CGF);
1442 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1443 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1444 llvm::CallInst *Call = CGF.Builder.CreateCall(
1445 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1446 OMPRTL___kmpc_global_thread_num),
1447 emitUpdateLocation(CGF, Loc));
1448 Call->setCallingConv(CGF.getRuntimeCC());
1449 Elem.second.ThreadID = Call;
1450 return Call;
1453 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1454 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1455 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1456 clearLocThreadIdInsertPt(CGF);
1457 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1459 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1460 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1461 UDRMap.erase(D);
1462 FunctionUDRMap.erase(CGF.CurFn);
1464 auto I = FunctionUDMMap.find(CGF.CurFn);
1465 if (I != FunctionUDMMap.end()) {
1466 for(const auto *D : I->second)
1467 UDMMap.erase(D);
1468 FunctionUDMMap.erase(I);
1470 LastprivateConditionalToTypes.erase(CGF.CurFn);
1471 FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1474 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1475 return OMPBuilder.IdentPtr;
1478 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1479 if (!Kmpc_MicroTy) {
1480 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1481 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1482 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1483 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1485 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1488 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1489 convertDeviceClause(const VarDecl *VD) {
1490 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1491 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1492 if (!DevTy)
1493 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1495 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1496 case OMPDeclareTargetDeclAttr::DT_Host:
1497 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1498 break;
1499 case OMPDeclareTargetDeclAttr::DT_NoHost:
1500 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1501 break;
1502 case OMPDeclareTargetDeclAttr::DT_Any:
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1504 break;
1505 default:
1506 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1507 break;
1511 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1512 convertCaptureClause(const VarDecl *VD) {
1513 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1514 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1515 if (!MapType)
1516 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1517 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1518 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1519 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1520 break;
1521 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1522 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1523 break;
1524 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1525 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1526 break;
1527 default:
1528 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1529 break;
1533 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1534 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1535 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1537 auto FileInfoCallBack = [&]() {
1538 SourceManager &SM = CGM.getContext().getSourceManager();
1539 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1541 llvm::sys::fs::UniqueID ID;
1542 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1543 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1546 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1549 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1552 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1553 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1555 auto LinkageForVariable = [&VD, this]() {
1556 return CGM.getLLVMLinkageVarDefinition(VD);
1559 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1561 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1562 CGM.getContext().getPointerType(VD->getType()));
1563 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1564 convertCaptureClause(VD), convertDeviceClause(VD),
1565 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1566 VD->isExternallyVisible(),
1567 getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1568 VD->getCanonicalDecl()->getBeginLoc()),
1569 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1570 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1571 LinkageForVariable);
1573 if (!addr)
1574 return Address::invalid();
1575 return Address(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1578 llvm::Constant *
1579 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1580 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1581 !CGM.getContext().getTargetInfo().isTLSSupported());
1582 // Lookup the entry, lazily creating it if necessary.
1583 std::string Suffix = getName({"cache", ""});
1584 return OMPBuilder.getOrCreateInternalVariable(
1585 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1588 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1589 const VarDecl *VD,
1590 Address VDAddr,
1591 SourceLocation Loc) {
1592 if (CGM.getLangOpts().OpenMPUseTLS &&
1593 CGM.getContext().getTargetInfo().isTLSSupported())
1594 return VDAddr;
1596 llvm::Type *VarTy = VDAddr.getElementType();
1597 llvm::Value *Args[] = {
1598 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1599 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1600 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1601 getOrCreateThreadPrivateCache(VD)};
1602 return Address(
1603 CGF.EmitRuntimeCall(
1604 OMPBuilder.getOrCreateRuntimeFunction(
1605 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1606 Args),
1607 CGF.Int8Ty, VDAddr.getAlignment());
1610 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1611 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1612 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1613 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1614 // library.
1615 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1616 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1617 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1618 OMPLoc);
1619 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1620 // to register constructor/destructor for variable.
1621 llvm::Value *Args[] = {
1622 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1623 Ctor, CopyCtor, Dtor};
1624 CGF.EmitRuntimeCall(
1625 OMPBuilder.getOrCreateRuntimeFunction(
1626 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1627 Args);
1630 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1631 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1632 bool PerformInit, CodeGenFunction *CGF) {
1633 if (CGM.getLangOpts().OpenMPUseTLS &&
1634 CGM.getContext().getTargetInfo().isTLSSupported())
1635 return nullptr;
1637 VD = VD->getDefinition(CGM.getContext());
1638 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1639 QualType ASTTy = VD->getType();
1641 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1642 const Expr *Init = VD->getAnyInitializer();
1643 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1644 // Generate function that re-emits the declaration's initializer into the
1645 // threadprivate copy of the variable VD
1646 CodeGenFunction CtorCGF(CGM);
1647 FunctionArgList Args;
1648 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1649 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1650 ImplicitParamDecl::Other);
1651 Args.push_back(&Dst);
1653 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1654 CGM.getContext().VoidPtrTy, Args);
1655 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1656 std::string Name = getName({"__kmpc_global_ctor_", ""});
1657 llvm::Function *Fn =
1658 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1659 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1660 Args, Loc, Loc);
1661 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1662 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1663 CGM.getContext().VoidPtrTy, Dst.getLocation());
1664 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1665 VDAddr.getAlignment());
1666 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1667 /*IsInitializer=*/true);
1668 ArgVal = CtorCGF.EmitLoadOfScalar(
1669 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1670 CGM.getContext().VoidPtrTy, Dst.getLocation());
1671 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1672 CtorCGF.FinishFunction();
1673 Ctor = Fn;
1675 if (VD->getType().isDestructedType() != QualType::DK_none) {
1676 // Generate function that emits destructor call for the threadprivate copy
1677 // of the variable VD
1678 CodeGenFunction DtorCGF(CGM);
1679 FunctionArgList Args;
1680 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1681 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1682 ImplicitParamDecl::Other);
1683 Args.push_back(&Dst);
1685 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1686 CGM.getContext().VoidTy, Args);
1687 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1688 std::string Name = getName({"__kmpc_global_dtor_", ""});
1689 llvm::Function *Fn =
1690 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1691 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1692 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1693 Loc, Loc);
1694 // Create a scope with an artificial location for the body of this function.
1695 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1696 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1697 DtorCGF.GetAddrOfLocalVar(&Dst),
1698 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1699 DtorCGF.emitDestroy(
1700 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1701 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1702 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1703 DtorCGF.FinishFunction();
1704 Dtor = Fn;
1706 // Do not emit init function if it is not required.
1707 if (!Ctor && !Dtor)
1708 return nullptr;
1710 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1711 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1712 /*isVarArg=*/false)
1713 ->getPointerTo();
1714 // Copying constructor for the threadprivate variable.
1715 // Must be NULL - reserved by runtime, but currently it requires that this
1716 // parameter is always NULL. Otherwise it fires assertion.
1717 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1718 if (Ctor == nullptr) {
1719 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1720 /*isVarArg=*/false)
1721 ->getPointerTo();
1722 Ctor = llvm::Constant::getNullValue(CtorTy);
1724 if (Dtor == nullptr) {
1725 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1726 /*isVarArg=*/false)
1727 ->getPointerTo();
1728 Dtor = llvm::Constant::getNullValue(DtorTy);
1730 if (!CGF) {
1731 auto *InitFunctionTy =
1732 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1733 std::string Name = getName({"__omp_threadprivate_init_", ""});
1734 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1735 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1736 CodeGenFunction InitCGF(CGM);
1737 FunctionArgList ArgList;
1738 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1739 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1740 Loc, Loc);
1741 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1742 InitCGF.FinishFunction();
1743 return InitFunction;
1745 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1747 return nullptr;
1750 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1751 llvm::GlobalVariable *Addr,
1752 bool PerformInit) {
1753 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1754 !CGM.getLangOpts().OpenMPIsTargetDevice)
1755 return false;
1756 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1757 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1758 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1759 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
1760 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
1761 HasRequiresUnifiedSharedMemory))
1762 return CGM.getLangOpts().OpenMPIsTargetDevice;
1763 VD = VD->getDefinition(CGM.getContext());
1764 assert(VD && "Unknown VarDecl");
1766 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1767 return CGM.getLangOpts().OpenMPIsTargetDevice;
1769 QualType ASTTy = VD->getType();
1770 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1772 // Produce the unique prefix to identify the new target regions. We use
1773 // the source location of the variable declaration which we know to not
1774 // conflict with any target region.
1775 llvm::TargetRegionEntryInfo EntryInfo =
1776 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, Loc, VD->getName());
1777 SmallString<128> Buffer, Out;
1778 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo);
1780 const Expr *Init = VD->getAnyInitializer();
1781 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1782 llvm::Constant *Ctor;
1783 llvm::Constant *ID;
1784 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1785 // Generate function that re-emits the declaration's initializer into
1786 // the threadprivate copy of the variable VD
1787 CodeGenFunction CtorCGF(CGM);
1789 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1790 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1791 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1792 FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1793 llvm::GlobalValue::WeakODRLinkage);
1794 Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1795 if (CGM.getTriple().isAMDGCN())
1796 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1797 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1798 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1799 FunctionArgList(), Loc, Loc);
1800 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1801 llvm::Constant *AddrInAS0 = Addr;
1802 if (Addr->getAddressSpace() != 0)
1803 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1804 Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
1805 CtorCGF.EmitAnyExprToMem(Init,
1806 Address(AddrInAS0, Addr->getValueType(),
1807 CGM.getContext().getDeclAlign(VD)),
1808 Init->getType().getQualifiers(),
1809 /*IsInitializer=*/true);
1810 CtorCGF.FinishFunction();
1811 Ctor = Fn;
1812 ID = Fn;
1813 } else {
1814 Ctor = new llvm::GlobalVariable(
1815 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1816 llvm::GlobalValue::PrivateLinkage,
1817 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1818 ID = Ctor;
1821 // Register the information for the entry associated with the constructor.
1822 Out.clear();
1823 auto CtorEntryInfo = EntryInfo;
1824 CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out);
1825 OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
1826 CtorEntryInfo, Ctor, ID,
1827 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor);
1829 if (VD->getType().isDestructedType() != QualType::DK_none) {
1830 llvm::Constant *Dtor;
1831 llvm::Constant *ID;
1832 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1833 // Generate function that emits destructor call for the threadprivate
1834 // copy of the variable VD
1835 CodeGenFunction DtorCGF(CGM);
1837 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1838 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1839 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1840 FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1841 llvm::GlobalValue::WeakODRLinkage);
1842 Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1843 if (CGM.getTriple().isAMDGCN())
1844 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1845 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1846 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1847 FunctionArgList(), Loc, Loc);
1848 // Create a scope with an artificial location for the body of this
1849 // function.
1850 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1851 llvm::Constant *AddrInAS0 = Addr;
1852 if (Addr->getAddressSpace() != 0)
1853 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1854 Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
1855 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1856 CGM.getContext().getDeclAlign(VD)),
1857 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1858 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1859 DtorCGF.FinishFunction();
1860 Dtor = Fn;
1861 ID = Fn;
1862 } else {
1863 Dtor = new llvm::GlobalVariable(
1864 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1865 llvm::GlobalValue::PrivateLinkage,
1866 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1867 ID = Dtor;
1869 // Register the information for the entry associated with the destructor.
1870 Out.clear();
1871 auto DtorEntryInfo = EntryInfo;
1872 DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out);
1873 OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
1874 DtorEntryInfo, Dtor, ID,
1875 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor);
1877 return CGM.getLangOpts().OpenMPIsTargetDevice;
1880 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1881 llvm::GlobalValue *GV) {
1882 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1883 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1885 // We only need to handle active 'indirect' declare target functions.
1886 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1887 return;
1889 // Get a mangled name to store the new device global in.
1890 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1891 CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1892 SmallString<128> Name;
1893 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1895 // We need to generate a new global to hold the address of the indirectly
1896 // called device function. Doing this allows us to keep the visibility and
1897 // linkage of the associated function unchanged while allowing the runtime to
1898 // access its value.
1899 llvm::GlobalValue *Addr = GV;
1900 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1901 Addr = new llvm::GlobalVariable(
1902 CGM.getModule(), CGM.VoidPtrTy,
1903 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1904 nullptr, llvm::GlobalValue::NotThreadLocal,
1905 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1906 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1909 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1910 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1911 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1912 llvm::GlobalValue::WeakODRLinkage);
1915 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1916 QualType VarType,
1917 StringRef Name) {
1918 std::string Suffix = getName({"artificial", ""});
1919 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1920 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1921 VarLVType, Twine(Name).concat(Suffix).str());
1922 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1923 CGM.getTarget().isTLSSupported()) {
1924 GAddr->setThreadLocal(/*Val=*/true);
1925 return Address(GAddr, GAddr->getValueType(),
1926 CGM.getContext().getTypeAlignInChars(VarType));
1928 std::string CacheSuffix = getName({"cache", ""});
1929 llvm::Value *Args[] = {
1930 emitUpdateLocation(CGF, SourceLocation()),
1931 getThreadID(CGF, SourceLocation()),
1932 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1933 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1934 /*isSigned=*/false),
1935 OMPBuilder.getOrCreateInternalVariable(
1936 CGM.VoidPtrPtrTy,
1937 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1938 return Address(
1939 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1940 CGF.EmitRuntimeCall(
1941 OMPBuilder.getOrCreateRuntimeFunction(
1942 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1943 Args),
1944 VarLVType->getPointerTo(/*AddrSpace=*/0)),
1945 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1948 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1949 const RegionCodeGenTy &ThenGen,
1950 const RegionCodeGenTy &ElseGen) {
1951 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1953 // If the condition constant folds and can be elided, try to avoid emitting
1954 // the condition and the dead arm of the if/else.
1955 bool CondConstant;
1956 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1957 if (CondConstant)
1958 ThenGen(CGF);
1959 else
1960 ElseGen(CGF);
1961 return;
1964 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1965 // emit the conditional branch.
1966 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1967 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1968 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1969 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1971 // Emit the 'then' code.
1972 CGF.EmitBlock(ThenBlock);
1973 ThenGen(CGF);
1974 CGF.EmitBranch(ContBlock);
1975 // Emit the 'else' code if present.
1976 // There is no need to emit line number for unconditional branch.
1977 (void)ApplyDebugLocation::CreateEmpty(CGF);
1978 CGF.EmitBlock(ElseBlock);
1979 ElseGen(CGF);
1980 // There is no need to emit line number for unconditional branch.
1981 (void)ApplyDebugLocation::CreateEmpty(CGF);
1982 CGF.EmitBranch(ContBlock);
1983 // Emit the continuation block for code after the if.
1984 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1987 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1988 llvm::Function *OutlinedFn,
1989 ArrayRef<llvm::Value *> CapturedVars,
1990 const Expr *IfCond,
1991 llvm::Value *NumThreads) {
1992 if (!CGF.HaveInsertPoint())
1993 return;
1994 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1995 auto &M = CGM.getModule();
1996 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1997 this](CodeGenFunction &CGF, PrePostActionTy &) {
1998 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1999 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2000 llvm::Value *Args[] = {
2001 RTLoc,
2002 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2003 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2004 llvm::SmallVector<llvm::Value *, 16> RealArgs;
2005 RealArgs.append(std::begin(Args), std::end(Args));
2006 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2008 llvm::FunctionCallee RTLFn =
2009 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2010 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2012 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2013 this](CodeGenFunction &CGF, PrePostActionTy &) {
2014 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2015 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2016 // Build calls:
2017 // __kmpc_serialized_parallel(&Loc, GTid);
2018 llvm::Value *Args[] = {RTLoc, ThreadID};
2019 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2020 M, OMPRTL___kmpc_serialized_parallel),
2021 Args);
2023 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2024 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2025 Address ZeroAddrBound =
2026 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2027 /*Name=*/".bound.zero.addr");
2028 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2029 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2030 // ThreadId for serialized parallels is 0.
2031 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2032 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2033 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2035 // Ensure we do not inline the function. This is trivially true for the ones
2036 // passed to __kmpc_fork_call but the ones called in serialized regions
2037 // could be inlined. This is not a perfect but it is closer to the invariant
2038 // we want, namely, every data environment starts with a new function.
2039 // TODO: We should pass the if condition to the runtime function and do the
2040 // handling there. Much cleaner code.
2041 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2042 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2043 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2045 // __kmpc_end_serialized_parallel(&Loc, GTid);
2046 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2047 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2048 M, OMPRTL___kmpc_end_serialized_parallel),
2049 EndArgs);
2051 if (IfCond) {
2052 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2053 } else {
2054 RegionCodeGenTy ThenRCG(ThenGen);
2055 ThenRCG(CGF);
2059 // If we're inside an (outlined) parallel region, use the region info's
2060 // thread-ID variable (it is passed in a first argument of the outlined function
2061 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2062 // regular serial code region, get thread ID by calling kmp_int32
2063 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2064 // return the address of that temp.
2065 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2066 SourceLocation Loc) {
2067 if (auto *OMPRegionInfo =
2068 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2069 if (OMPRegionInfo->getThreadIDVariable())
2070 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2072 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2073 QualType Int32Ty =
2074 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2075 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2076 CGF.EmitStoreOfScalar(ThreadID,
2077 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2079 return ThreadIDTemp;
2082 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2083 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2084 std::string Name = getName({Prefix, "var"});
2085 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2088 namespace {
2089 /// Common pre(post)-action for different OpenMP constructs.
2090 class CommonActionTy final : public PrePostActionTy {
2091 llvm::FunctionCallee EnterCallee;
2092 ArrayRef<llvm::Value *> EnterArgs;
2093 llvm::FunctionCallee ExitCallee;
2094 ArrayRef<llvm::Value *> ExitArgs;
2095 bool Conditional;
2096 llvm::BasicBlock *ContBlock = nullptr;
2098 public:
2099 CommonActionTy(llvm::FunctionCallee EnterCallee,
2100 ArrayRef<llvm::Value *> EnterArgs,
2101 llvm::FunctionCallee ExitCallee,
2102 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2103 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2104 ExitArgs(ExitArgs), Conditional(Conditional) {}
2105 void Enter(CodeGenFunction &CGF) override {
2106 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2107 if (Conditional) {
2108 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2109 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2110 ContBlock = CGF.createBasicBlock("omp_if.end");
2111 // Generate the branch (If-stmt)
2112 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2113 CGF.EmitBlock(ThenBlock);
2116 void Done(CodeGenFunction &CGF) {
2117 // Emit the rest of blocks/branches
2118 CGF.EmitBranch(ContBlock);
2119 CGF.EmitBlock(ContBlock, true);
2121 void Exit(CodeGenFunction &CGF) override {
2122 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2125 } // anonymous namespace
2127 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2128 StringRef CriticalName,
2129 const RegionCodeGenTy &CriticalOpGen,
2130 SourceLocation Loc, const Expr *Hint) {
2131 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2132 // CriticalOpGen();
2133 // __kmpc_end_critical(ident_t *, gtid, Lock);
2134 // Prepare arguments and build a call to __kmpc_critical
2135 if (!CGF.HaveInsertPoint())
2136 return;
2137 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2138 getCriticalRegionLock(CriticalName)};
2139 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2140 std::end(Args));
2141 if (Hint) {
2142 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2143 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2145 CommonActionTy Action(
2146 OMPBuilder.getOrCreateRuntimeFunction(
2147 CGM.getModule(),
2148 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2149 EnterArgs,
2150 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2151 OMPRTL___kmpc_end_critical),
2152 Args);
2153 CriticalOpGen.setAction(Action);
2154 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2157 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2158 const RegionCodeGenTy &MasterOpGen,
2159 SourceLocation Loc) {
2160 if (!CGF.HaveInsertPoint())
2161 return;
2162 // if(__kmpc_master(ident_t *, gtid)) {
2163 // MasterOpGen();
2164 // __kmpc_end_master(ident_t *, gtid);
2165 // }
2166 // Prepare arguments and build a call to __kmpc_master
2167 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2168 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2169 CGM.getModule(), OMPRTL___kmpc_master),
2170 Args,
2171 OMPBuilder.getOrCreateRuntimeFunction(
2172 CGM.getModule(), OMPRTL___kmpc_end_master),
2173 Args,
2174 /*Conditional=*/true);
2175 MasterOpGen.setAction(Action);
2176 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2177 Action.Done(CGF);
2180 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2181 const RegionCodeGenTy &MaskedOpGen,
2182 SourceLocation Loc, const Expr *Filter) {
2183 if (!CGF.HaveInsertPoint())
2184 return;
2185 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2186 // MaskedOpGen();
2187 // __kmpc_end_masked(iden_t *, gtid);
2188 // }
2189 // Prepare arguments and build a call to __kmpc_masked
2190 llvm::Value *FilterVal = Filter
2191 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2192 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2193 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2194 FilterVal};
2195 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2196 getThreadID(CGF, Loc)};
2197 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2198 CGM.getModule(), OMPRTL___kmpc_masked),
2199 Args,
2200 OMPBuilder.getOrCreateRuntimeFunction(
2201 CGM.getModule(), OMPRTL___kmpc_end_masked),
2202 ArgsEnd,
2203 /*Conditional=*/true);
2204 MaskedOpGen.setAction(Action);
2205 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2206 Action.Done(CGF);
2209 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2210 SourceLocation Loc) {
2211 if (!CGF.HaveInsertPoint())
2212 return;
2213 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2214 OMPBuilder.createTaskyield(CGF.Builder);
2215 } else {
2216 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2217 llvm::Value *Args[] = {
2218 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2219 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2220 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2221 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2222 Args);
2225 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2226 Region->emitUntiedSwitch(CGF);
2229 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2230 const RegionCodeGenTy &TaskgroupOpGen,
2231 SourceLocation Loc) {
2232 if (!CGF.HaveInsertPoint())
2233 return;
2234 // __kmpc_taskgroup(ident_t *, gtid);
2235 // TaskgroupOpGen();
2236 // __kmpc_end_taskgroup(ident_t *, gtid);
2237 // Prepare arguments and build a call to __kmpc_taskgroup
2238 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2239 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2240 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2241 Args,
2242 OMPBuilder.getOrCreateRuntimeFunction(
2243 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2244 Args);
2245 TaskgroupOpGen.setAction(Action);
2246 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2249 /// Given an array of pointers to variables, project the address of a
2250 /// given variable.
2251 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2252 unsigned Index, const VarDecl *Var) {
2253 // Pull out the pointer to the variable.
2254 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2255 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2257 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2258 return Address(
2259 CGF.Builder.CreateBitCast(
2260 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2261 ElemTy, CGF.getContext().getDeclAlign(Var));
2264 static llvm::Value *emitCopyprivateCopyFunction(
2265 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2266 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2267 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2268 SourceLocation Loc) {
2269 ASTContext &C = CGM.getContext();
2270 // void copy_func(void *LHSArg, void *RHSArg);
2271 FunctionArgList Args;
2272 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2273 ImplicitParamDecl::Other);
2274 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2275 ImplicitParamDecl::Other);
2276 Args.push_back(&LHSArg);
2277 Args.push_back(&RHSArg);
2278 const auto &CGFI =
2279 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2280 std::string Name =
2281 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2282 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2283 llvm::GlobalValue::InternalLinkage, Name,
2284 &CGM.getModule());
2285 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2286 Fn->setDoesNotRecurse();
2287 CodeGenFunction CGF(CGM);
2288 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2289 // Dest = (void*[n])(LHSArg);
2290 // Src = (void*[n])(RHSArg);
2291 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2292 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2293 ArgsElemType->getPointerTo()),
2294 ArgsElemType, CGF.getPointerAlign());
2295 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2296 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2297 ArgsElemType->getPointerTo()),
2298 ArgsElemType, CGF.getPointerAlign());
2299 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2300 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2301 // ...
2302 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2303 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2304 const auto *DestVar =
2305 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2306 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2308 const auto *SrcVar =
2309 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2310 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2312 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2313 QualType Type = VD->getType();
2314 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2316 CGF.FinishFunction();
2317 return Fn;
2320 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2321 const RegionCodeGenTy &SingleOpGen,
2322 SourceLocation Loc,
2323 ArrayRef<const Expr *> CopyprivateVars,
2324 ArrayRef<const Expr *> SrcExprs,
2325 ArrayRef<const Expr *> DstExprs,
2326 ArrayRef<const Expr *> AssignmentOps) {
2327 if (!CGF.HaveInsertPoint())
2328 return;
2329 assert(CopyprivateVars.size() == SrcExprs.size() &&
2330 CopyprivateVars.size() == DstExprs.size() &&
2331 CopyprivateVars.size() == AssignmentOps.size());
2332 ASTContext &C = CGM.getContext();
2333 // int32 did_it = 0;
2334 // if(__kmpc_single(ident_t *, gtid)) {
2335 // SingleOpGen();
2336 // __kmpc_end_single(ident_t *, gtid);
2337 // did_it = 1;
2338 // }
2339 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2340 // <copy_func>, did_it);
2342 Address DidIt = Address::invalid();
2343 if (!CopyprivateVars.empty()) {
2344 // int32 did_it = 0;
2345 QualType KmpInt32Ty =
2346 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2347 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2348 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2350 // Prepare arguments and build a call to __kmpc_single
2351 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2352 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2353 CGM.getModule(), OMPRTL___kmpc_single),
2354 Args,
2355 OMPBuilder.getOrCreateRuntimeFunction(
2356 CGM.getModule(), OMPRTL___kmpc_end_single),
2357 Args,
2358 /*Conditional=*/true);
2359 SingleOpGen.setAction(Action);
2360 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2361 if (DidIt.isValid()) {
2362 // did_it = 1;
2363 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2365 Action.Done(CGF);
2366 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2367 // <copy_func>, did_it);
2368 if (DidIt.isValid()) {
2369 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2370 QualType CopyprivateArrayTy = C.getConstantArrayType(
2371 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2372 /*IndexTypeQuals=*/0);
2373 // Create a list of all private variables for copyprivate.
2374 Address CopyprivateList =
2375 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2376 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2377 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2378 CGF.Builder.CreateStore(
2379 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2380 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2381 CGF.VoidPtrTy),
2382 Elem);
2384 // Build function that copies private values from single region to all other
2385 // threads in the corresponding parallel region.
2386 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2387 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2388 SrcExprs, DstExprs, AssignmentOps, Loc);
2389 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2390 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2391 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2392 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2393 llvm::Value *Args[] = {
2394 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2395 getThreadID(CGF, Loc), // i32 <gtid>
2396 BufSize, // size_t <buf_size>
2397 CL.getPointer(), // void *<copyprivate list>
2398 CpyFn, // void (*) (void *, void *) <copy_func>
2399 DidItVal // i32 did_it
2401 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2402 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2403 Args);
2407 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2408 const RegionCodeGenTy &OrderedOpGen,
2409 SourceLocation Loc, bool IsThreads) {
2410 if (!CGF.HaveInsertPoint())
2411 return;
2412 // __kmpc_ordered(ident_t *, gtid);
2413 // OrderedOpGen();
2414 // __kmpc_end_ordered(ident_t *, gtid);
2415 // Prepare arguments and build a call to __kmpc_ordered
2416 if (IsThreads) {
2417 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2418 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2419 CGM.getModule(), OMPRTL___kmpc_ordered),
2420 Args,
2421 OMPBuilder.getOrCreateRuntimeFunction(
2422 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2423 Args);
2424 OrderedOpGen.setAction(Action);
2425 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2426 return;
2428 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2431 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2432 unsigned Flags;
2433 if (Kind == OMPD_for)
2434 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2435 else if (Kind == OMPD_sections)
2436 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2437 else if (Kind == OMPD_single)
2438 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2439 else if (Kind == OMPD_barrier)
2440 Flags = OMP_IDENT_BARRIER_EXPL;
2441 else
2442 Flags = OMP_IDENT_BARRIER_IMPL;
2443 return Flags;
2446 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2447 CodeGenFunction &CGF, const OMPLoopDirective &S,
2448 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2449 // Check if the loop directive is actually a doacross loop directive. In this
2450 // case choose static, 1 schedule.
2451 if (llvm::any_of(
2452 S.getClausesOfKind<OMPOrderedClause>(),
2453 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2454 ScheduleKind = OMPC_SCHEDULE_static;
2455 // Chunk size is 1 in this case.
2456 llvm::APInt ChunkSize(32, 1);
2457 ChunkExpr = IntegerLiteral::Create(
2458 CGF.getContext(), ChunkSize,
2459 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2460 SourceLocation());
2464 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2465 OpenMPDirectiveKind Kind, bool EmitChecks,
2466 bool ForceSimpleCall) {
2467 // Check if we should use the OMPBuilder
2468 auto *OMPRegionInfo =
2469 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2470 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2471 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2472 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2473 return;
2476 if (!CGF.HaveInsertPoint())
2477 return;
2478 // Build call __kmpc_cancel_barrier(loc, thread_id);
2479 // Build call __kmpc_barrier(loc, thread_id);
2480 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2481 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2482 // thread_id);
2483 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2484 getThreadID(CGF, Loc)};
2485 if (OMPRegionInfo) {
2486 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2487 llvm::Value *Result = CGF.EmitRuntimeCall(
2488 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2489 OMPRTL___kmpc_cancel_barrier),
2490 Args);
2491 if (EmitChecks) {
2492 // if (__kmpc_cancel_barrier()) {
2493 // exit from construct;
2494 // }
2495 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2496 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2497 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2498 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2499 CGF.EmitBlock(ExitBB);
2500 // exit from construct;
2501 CodeGenFunction::JumpDest CancelDestination =
2502 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2503 CGF.EmitBranchThroughCleanup(CancelDestination);
2504 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2506 return;
2509 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2510 CGM.getModule(), OMPRTL___kmpc_barrier),
2511 Args);
2514 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2515 Expr *ME, bool IsFatal) {
2516 llvm::Value *MVL =
2517 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2518 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2519 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2520 // *message)
2521 llvm::Value *Args[] = {
2522 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2523 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2524 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2525 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2526 CGM.getModule(), OMPRTL___kmpc_error),
2527 Args);
2530 /// Map the OpenMP loop schedule to the runtime enumeration.
2531 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2532 bool Chunked, bool Ordered) {
2533 switch (ScheduleKind) {
2534 case OMPC_SCHEDULE_static:
2535 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2536 : (Ordered ? OMP_ord_static : OMP_sch_static);
2537 case OMPC_SCHEDULE_dynamic:
2538 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2539 case OMPC_SCHEDULE_guided:
2540 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2541 case OMPC_SCHEDULE_runtime:
2542 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2543 case OMPC_SCHEDULE_auto:
2544 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2545 case OMPC_SCHEDULE_unknown:
2546 assert(!Chunked && "chunk was specified but schedule kind not known");
2547 return Ordered ? OMP_ord_static : OMP_sch_static;
2549 llvm_unreachable("Unexpected runtime schedule");
2552 /// Map the OpenMP distribute schedule to the runtime enumeration.
2553 static OpenMPSchedType
2554 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2555 // only static is allowed for dist_schedule
2556 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2559 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2560 bool Chunked) const {
2561 OpenMPSchedType Schedule =
2562 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2563 return Schedule == OMP_sch_static;
2566 bool CGOpenMPRuntime::isStaticNonchunked(
2567 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2568 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2569 return Schedule == OMP_dist_sch_static;
2572 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2573 bool Chunked) const {
2574 OpenMPSchedType Schedule =
2575 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2576 return Schedule == OMP_sch_static_chunked;
2579 bool CGOpenMPRuntime::isStaticChunked(
2580 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2581 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2582 return Schedule == OMP_dist_sch_static_chunked;
2585 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2586 OpenMPSchedType Schedule =
2587 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2588 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2589 return Schedule != OMP_sch_static;
2592 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2593 OpenMPScheduleClauseModifier M1,
2594 OpenMPScheduleClauseModifier M2) {
2595 int Modifier = 0;
2596 switch (M1) {
2597 case OMPC_SCHEDULE_MODIFIER_monotonic:
2598 Modifier = OMP_sch_modifier_monotonic;
2599 break;
2600 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2601 Modifier = OMP_sch_modifier_nonmonotonic;
2602 break;
2603 case OMPC_SCHEDULE_MODIFIER_simd:
2604 if (Schedule == OMP_sch_static_chunked)
2605 Schedule = OMP_sch_static_balanced_chunked;
2606 break;
2607 case OMPC_SCHEDULE_MODIFIER_last:
2608 case OMPC_SCHEDULE_MODIFIER_unknown:
2609 break;
2611 switch (M2) {
2612 case OMPC_SCHEDULE_MODIFIER_monotonic:
2613 Modifier = OMP_sch_modifier_monotonic;
2614 break;
2615 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2616 Modifier = OMP_sch_modifier_nonmonotonic;
2617 break;
2618 case OMPC_SCHEDULE_MODIFIER_simd:
2619 if (Schedule == OMP_sch_static_chunked)
2620 Schedule = OMP_sch_static_balanced_chunked;
2621 break;
2622 case OMPC_SCHEDULE_MODIFIER_last:
2623 case OMPC_SCHEDULE_MODIFIER_unknown:
2624 break;
2626 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2627 // If the static schedule kind is specified or if the ordered clause is
2628 // specified, and if the nonmonotonic modifier is not specified, the effect is
2629 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2630 // modifier is specified, the effect is as if the nonmonotonic modifier is
2631 // specified.
2632 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2633 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2634 Schedule == OMP_sch_static_balanced_chunked ||
2635 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2636 Schedule == OMP_dist_sch_static_chunked ||
2637 Schedule == OMP_dist_sch_static))
2638 Modifier = OMP_sch_modifier_nonmonotonic;
2640 return Schedule | Modifier;
2643 void CGOpenMPRuntime::emitForDispatchInit(
2644 CodeGenFunction &CGF, SourceLocation Loc,
2645 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2646 bool Ordered, const DispatchRTInput &DispatchValues) {
2647 if (!CGF.HaveInsertPoint())
2648 return;
2649 OpenMPSchedType Schedule = getRuntimeSchedule(
2650 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2651 assert(Ordered ||
2652 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2653 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2654 Schedule != OMP_sch_static_balanced_chunked));
2655 // Call __kmpc_dispatch_init(
2656 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2657 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2658 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2660 // If the Chunk was not specified in the clause - use default value 1.
2661 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2662 : CGF.Builder.getIntN(IVSize, 1);
2663 llvm::Value *Args[] = {
2664 emitUpdateLocation(CGF, Loc),
2665 getThreadID(CGF, Loc),
2666 CGF.Builder.getInt32(addMonoNonMonoModifier(
2667 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2668 DispatchValues.LB, // Lower
2669 DispatchValues.UB, // Upper
2670 CGF.Builder.getIntN(IVSize, 1), // Stride
2671 Chunk // Chunk
2673 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2674 Args);
2677 static void emitForStaticInitCall(
2678 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2679 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2680 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2681 const CGOpenMPRuntime::StaticRTInput &Values) {
2682 if (!CGF.HaveInsertPoint())
2683 return;
2685 assert(!Values.Ordered);
2686 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2687 Schedule == OMP_sch_static_balanced_chunked ||
2688 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2689 Schedule == OMP_dist_sch_static ||
2690 Schedule == OMP_dist_sch_static_chunked);
2692 // Call __kmpc_for_static_init(
2693 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2694 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2695 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2696 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2697 llvm::Value *Chunk = Values.Chunk;
2698 if (Chunk == nullptr) {
2699 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2700 Schedule == OMP_dist_sch_static) &&
2701 "expected static non-chunked schedule");
2702 // If the Chunk was not specified in the clause - use default value 1.
2703 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2704 } else {
2705 assert((Schedule == OMP_sch_static_chunked ||
2706 Schedule == OMP_sch_static_balanced_chunked ||
2707 Schedule == OMP_ord_static_chunked ||
2708 Schedule == OMP_dist_sch_static_chunked) &&
2709 "expected static chunked schedule");
2711 llvm::Value *Args[] = {
2712 UpdateLocation,
2713 ThreadId,
2714 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2715 M2)), // Schedule type
2716 Values.IL.getPointer(), // &isLastIter
2717 Values.LB.getPointer(), // &LB
2718 Values.UB.getPointer(), // &UB
2719 Values.ST.getPointer(), // &Stride
2720 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2721 Chunk // Chunk
2723 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2726 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2727 SourceLocation Loc,
2728 OpenMPDirectiveKind DKind,
2729 const OpenMPScheduleTy &ScheduleKind,
2730 const StaticRTInput &Values) {
2731 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2732 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2733 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2734 "Expected loop-based or sections-based directive.");
2735 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2736 isOpenMPLoopDirective(DKind)
2737 ? OMP_IDENT_WORK_LOOP
2738 : OMP_IDENT_WORK_SECTIONS);
2739 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2740 llvm::FunctionCallee StaticInitFunction =
2741 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2742 false);
2743 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2744 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2745 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2748 void CGOpenMPRuntime::emitDistributeStaticInit(
2749 CodeGenFunction &CGF, SourceLocation Loc,
2750 OpenMPDistScheduleClauseKind SchedKind,
2751 const CGOpenMPRuntime::StaticRTInput &Values) {
2752 OpenMPSchedType ScheduleNum =
2753 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2754 llvm::Value *UpdatedLocation =
2755 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2756 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2757 llvm::FunctionCallee StaticInitFunction;
2758 bool isGPUDistribute =
2759 CGM.getLangOpts().OpenMPIsTargetDevice &&
2760 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2761 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2762 Values.IVSize, Values.IVSigned, isGPUDistribute);
2764 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2765 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2766 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2769 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2770 SourceLocation Loc,
2771 OpenMPDirectiveKind DKind) {
2772 if (!CGF.HaveInsertPoint())
2773 return;
2774 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2775 llvm::Value *Args[] = {
2776 emitUpdateLocation(CGF, Loc,
2777 isOpenMPDistributeDirective(DKind)
2778 ? OMP_IDENT_WORK_DISTRIBUTE
2779 : isOpenMPLoopDirective(DKind)
2780 ? OMP_IDENT_WORK_LOOP
2781 : OMP_IDENT_WORK_SECTIONS),
2782 getThreadID(CGF, Loc)};
2783 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2784 if (isOpenMPDistributeDirective(DKind) &&
2785 CGM.getLangOpts().OpenMPIsTargetDevice &&
2786 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2787 CGF.EmitRuntimeCall(
2788 OMPBuilder.getOrCreateRuntimeFunction(
2789 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2790 Args);
2791 else
2792 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2793 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2794 Args);
2797 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2798 SourceLocation Loc,
2799 unsigned IVSize,
2800 bool IVSigned) {
2801 if (!CGF.HaveInsertPoint())
2802 return;
2803 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2804 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2805 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2806 Args);
2809 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2810 SourceLocation Loc, unsigned IVSize,
2811 bool IVSigned, Address IL,
2812 Address LB, Address UB,
2813 Address ST) {
2814 // Call __kmpc_dispatch_next(
2815 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2816 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2817 // kmp_int[32|64] *p_stride);
2818 llvm::Value *Args[] = {
2819 emitUpdateLocation(CGF, Loc),
2820 getThreadID(CGF, Loc),
2821 IL.getPointer(), // &isLastIter
2822 LB.getPointer(), // &Lower
2823 UB.getPointer(), // &Upper
2824 ST.getPointer() // &Stride
2826 llvm::Value *Call = CGF.EmitRuntimeCall(
2827 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2828 return CGF.EmitScalarConversion(
2829 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2830 CGF.getContext().BoolTy, Loc);
2833 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2834 llvm::Value *NumThreads,
2835 SourceLocation Loc) {
2836 if (!CGF.HaveInsertPoint())
2837 return;
2838 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2839 llvm::Value *Args[] = {
2840 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2841 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2842 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2843 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2844 Args);
2847 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2848 ProcBindKind ProcBind,
2849 SourceLocation Loc) {
2850 if (!CGF.HaveInsertPoint())
2851 return;
2852 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2853 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2854 llvm::Value *Args[] = {
2855 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2856 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2857 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2858 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2859 Args);
2862 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2863 SourceLocation Loc, llvm::AtomicOrdering AO) {
2864 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2865 OMPBuilder.createFlush(CGF.Builder);
2866 } else {
2867 if (!CGF.HaveInsertPoint())
2868 return;
2869 // Build call void __kmpc_flush(ident_t *loc)
2870 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2871 CGM.getModule(), OMPRTL___kmpc_flush),
2872 emitUpdateLocation(CGF, Loc));
2876 namespace {
2877 /// Indexes of fields for type kmp_task_t.
2878 enum KmpTaskTFields {
2879 /// List of shared variables.
2880 KmpTaskTShareds,
2881 /// Task routine.
2882 KmpTaskTRoutine,
2883 /// Partition id for the untied tasks.
2884 KmpTaskTPartId,
2885 /// Function with call of destructors for private variables.
2886 Data1,
2887 /// Task priority.
2888 Data2,
2889 /// (Taskloops only) Lower bound.
2890 KmpTaskTLowerBound,
2891 /// (Taskloops only) Upper bound.
2892 KmpTaskTUpperBound,
2893 /// (Taskloops only) Stride.
2894 KmpTaskTStride,
2895 /// (Taskloops only) Is last iteration flag.
2896 KmpTaskTLastIter,
2897 /// (Taskloops only) Reduction data.
2898 KmpTaskTReductions,
2900 } // anonymous namespace
2902 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2903 // If we are in simd mode or there are no entries, we don't need to do
2904 // anything.
2905 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2906 return;
2908 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2909 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2910 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2911 SourceLocation Loc;
2912 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2913 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2914 E = CGM.getContext().getSourceManager().fileinfo_end();
2915 I != E; ++I) {
2916 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2917 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2918 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2919 I->getFirst(), EntryInfo.Line, 1);
2920 break;
2924 switch (Kind) {
2925 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2926 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2927 DiagnosticsEngine::Error, "Offloading entry for target region in "
2928 "%0 is incorrect: either the "
2929 "address or the ID is invalid.");
2930 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2931 } break;
2932 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2933 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2934 DiagnosticsEngine::Error, "Offloading entry for declare target "
2935 "variable %0 is incorrect: the "
2936 "address is invalid.");
2937 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2938 } break;
2939 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2940 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2941 DiagnosticsEngine::Error,
2942 "Offloading entry for declare target variable is incorrect: the "
2943 "address is invalid.");
2944 CGM.getDiags().Report(DiagID);
2945 } break;
2949 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2952 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2953 if (!KmpRoutineEntryPtrTy) {
2954 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2955 ASTContext &C = CGM.getContext();
2956 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2957 FunctionProtoType::ExtProtoInfo EPI;
2958 KmpRoutineEntryPtrQTy = C.getPointerType(
2959 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2960 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2964 namespace {
2965 struct PrivateHelpersTy {
2966 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2967 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2968 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2969 PrivateElemInit(PrivateElemInit) {}
2970 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2971 const Expr *OriginalRef = nullptr;
2972 const VarDecl *Original = nullptr;
2973 const VarDecl *PrivateCopy = nullptr;
2974 const VarDecl *PrivateElemInit = nullptr;
2975 bool isLocalPrivate() const {
2976 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2979 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2980 } // anonymous namespace
2982 static bool isAllocatableDecl(const VarDecl *VD) {
2983 const VarDecl *CVD = VD->getCanonicalDecl();
2984 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2985 return false;
2986 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2987 // Use the default allocation.
2988 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2989 !AA->getAllocator());
2992 static RecordDecl *
2993 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2994 if (!Privates.empty()) {
2995 ASTContext &C = CGM.getContext();
2996 // Build struct .kmp_privates_t. {
2997 // /* private vars */
2998 // };
2999 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3000 RD->startDefinition();
3001 for (const auto &Pair : Privates) {
3002 const VarDecl *VD = Pair.second.Original;
3003 QualType Type = VD->getType().getNonReferenceType();
3004 // If the private variable is a local variable with lvalue ref type,
3005 // allocate the pointer instead of the pointee type.
3006 if (Pair.second.isLocalPrivate()) {
3007 if (VD->getType()->isLValueReferenceType())
3008 Type = C.getPointerType(Type);
3009 if (isAllocatableDecl(VD))
3010 Type = C.getPointerType(Type);
3012 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3013 if (VD->hasAttrs()) {
3014 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3015 E(VD->getAttrs().end());
3016 I != E; ++I)
3017 FD->addAttr(*I);
3020 RD->completeDefinition();
3021 return RD;
3023 return nullptr;
3026 static RecordDecl *
3027 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3028 QualType KmpInt32Ty,
3029 QualType KmpRoutineEntryPointerQTy) {
3030 ASTContext &C = CGM.getContext();
3031 // Build struct kmp_task_t {
3032 // void * shareds;
3033 // kmp_routine_entry_t routine;
3034 // kmp_int32 part_id;
3035 // kmp_cmplrdata_t data1;
3036 // kmp_cmplrdata_t data2;
3037 // For taskloops additional fields:
3038 // kmp_uint64 lb;
3039 // kmp_uint64 ub;
3040 // kmp_int64 st;
3041 // kmp_int32 liter;
3042 // void * reductions;
3043 // };
3044 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3045 UD->startDefinition();
3046 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3047 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3048 UD->completeDefinition();
3049 QualType KmpCmplrdataTy = C.getRecordType(UD);
3050 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3051 RD->startDefinition();
3052 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3053 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3054 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3055 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3056 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3057 if (isOpenMPTaskLoopDirective(Kind)) {
3058 QualType KmpUInt64Ty =
3059 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3060 QualType KmpInt64Ty =
3061 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3062 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3063 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3064 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3065 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3066 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3068 RD->completeDefinition();
3069 return RD;
3072 static RecordDecl *
3073 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3074 ArrayRef<PrivateDataTy> Privates) {
3075 ASTContext &C = CGM.getContext();
3076 // Build struct kmp_task_t_with_privates {
3077 // kmp_task_t task_data;
3078 // .kmp_privates_t. privates;
3079 // };
3080 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3081 RD->startDefinition();
3082 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3083 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3084 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3085 RD->completeDefinition();
3086 return RD;
3089 /// Emit a proxy function which accepts kmp_task_t as the second
3090 /// argument.
3091 /// \code
3092 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3093 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3094 /// For taskloops:
3095 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3096 /// tt->reductions, tt->shareds);
3097 /// return 0;
3098 /// }
3099 /// \endcode
3100 static llvm::Function *
3101 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3102 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3103 QualType KmpTaskTWithPrivatesPtrQTy,
3104 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3105 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3106 llvm::Value *TaskPrivatesMap) {
3107 ASTContext &C = CGM.getContext();
3108 FunctionArgList Args;
3109 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3110 ImplicitParamDecl::Other);
3111 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3112 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3113 ImplicitParamDecl::Other);
3114 Args.push_back(&GtidArg);
3115 Args.push_back(&TaskTypeArg);
3116 const auto &TaskEntryFnInfo =
3117 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3118 llvm::FunctionType *TaskEntryTy =
3119 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3120 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3121 auto *TaskEntry = llvm::Function::Create(
3122 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3123 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3124 TaskEntry->setDoesNotRecurse();
3125 CodeGenFunction CGF(CGM);
3126 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3127 Loc, Loc);
3129 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3130 // tt,
3131 // For taskloops:
3132 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3133 // tt->task_data.shareds);
3134 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3135 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3136 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3137 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3138 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3139 const auto *KmpTaskTWithPrivatesQTyRD =
3140 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3141 LValue Base =
3142 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3143 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3144 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3145 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3146 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3148 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3149 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3150 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3151 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3152 CGF.ConvertTypeForMem(SharedsPtrTy));
3154 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3155 llvm::Value *PrivatesParam;
3156 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3157 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3158 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3159 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3160 } else {
3161 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3164 llvm::Value *CommonArgs[] = {
3165 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3166 CGF.Builder
3167 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3168 CGF.VoidPtrTy, CGF.Int8Ty)
3169 .getPointer()};
3170 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3171 std::end(CommonArgs));
3172 if (isOpenMPTaskLoopDirective(Kind)) {
3173 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3174 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3175 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3176 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3177 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3178 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3179 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3180 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3181 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3182 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3183 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3184 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3185 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3186 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3187 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3188 CallArgs.push_back(LBParam);
3189 CallArgs.push_back(UBParam);
3190 CallArgs.push_back(StParam);
3191 CallArgs.push_back(LIParam);
3192 CallArgs.push_back(RParam);
3194 CallArgs.push_back(SharedsParam);
3196 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3197 CallArgs);
3198 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3199 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3200 CGF.FinishFunction();
3201 return TaskEntry;
3204 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3205 SourceLocation Loc,
3206 QualType KmpInt32Ty,
3207 QualType KmpTaskTWithPrivatesPtrQTy,
3208 QualType KmpTaskTWithPrivatesQTy) {
3209 ASTContext &C = CGM.getContext();
3210 FunctionArgList Args;
3211 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3212 ImplicitParamDecl::Other);
3213 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3214 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3215 ImplicitParamDecl::Other);
3216 Args.push_back(&GtidArg);
3217 Args.push_back(&TaskTypeArg);
3218 const auto &DestructorFnInfo =
3219 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3220 llvm::FunctionType *DestructorFnTy =
3221 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3222 std::string Name =
3223 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3224 auto *DestructorFn =
3225 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3226 Name, &CGM.getModule());
3227 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3228 DestructorFnInfo);
3229 DestructorFn->setDoesNotRecurse();
3230 CodeGenFunction CGF(CGM);
3231 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3232 Args, Loc, Loc);
3234 LValue Base = CGF.EmitLoadOfPointerLValue(
3235 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3236 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3237 const auto *KmpTaskTWithPrivatesQTyRD =
3238 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3239 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3240 Base = CGF.EmitLValueForField(Base, *FI);
3241 for (const auto *Field :
3242 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3243 if (QualType::DestructionKind DtorKind =
3244 Field->getType().isDestructedType()) {
3245 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3246 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3249 CGF.FinishFunction();
3250 return DestructorFn;
3253 /// Emit a privates mapping function for correct handling of private and
3254 /// firstprivate variables.
3255 /// \code
3256 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3257 /// **noalias priv1,..., <tyn> **noalias privn) {
3258 /// *priv1 = &.privates.priv1;
3259 /// ...;
3260 /// *privn = &.privates.privn;
3261 /// }
3262 /// \endcode
3263 static llvm::Value *
3264 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3265 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3266 ArrayRef<PrivateDataTy> Privates) {
3267 ASTContext &C = CGM.getContext();
3268 FunctionArgList Args;
3269 ImplicitParamDecl TaskPrivatesArg(
3270 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3271 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3272 ImplicitParamDecl::Other);
3273 Args.push_back(&TaskPrivatesArg);
3274 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3275 unsigned Counter = 1;
3276 for (const Expr *E : Data.PrivateVars) {
3277 Args.push_back(ImplicitParamDecl::Create(
3278 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3279 C.getPointerType(C.getPointerType(E->getType()))
3280 .withConst()
3281 .withRestrict(),
3282 ImplicitParamDecl::Other));
3283 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3284 PrivateVarsPos[VD] = Counter;
3285 ++Counter;
3287 for (const Expr *E : Data.FirstprivateVars) {
3288 Args.push_back(ImplicitParamDecl::Create(
3289 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3290 C.getPointerType(C.getPointerType(E->getType()))
3291 .withConst()
3292 .withRestrict(),
3293 ImplicitParamDecl::Other));
3294 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3295 PrivateVarsPos[VD] = Counter;
3296 ++Counter;
3298 for (const Expr *E : Data.LastprivateVars) {
3299 Args.push_back(ImplicitParamDecl::Create(
3300 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3301 C.getPointerType(C.getPointerType(E->getType()))
3302 .withConst()
3303 .withRestrict(),
3304 ImplicitParamDecl::Other));
3305 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3306 PrivateVarsPos[VD] = Counter;
3307 ++Counter;
3309 for (const VarDecl *VD : Data.PrivateLocals) {
3310 QualType Ty = VD->getType().getNonReferenceType();
3311 if (VD->getType()->isLValueReferenceType())
3312 Ty = C.getPointerType(Ty);
3313 if (isAllocatableDecl(VD))
3314 Ty = C.getPointerType(Ty);
3315 Args.push_back(ImplicitParamDecl::Create(
3316 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3317 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3318 ImplicitParamDecl::Other));
3319 PrivateVarsPos[VD] = Counter;
3320 ++Counter;
3322 const auto &TaskPrivatesMapFnInfo =
3323 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3324 llvm::FunctionType *TaskPrivatesMapTy =
3325 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3326 std::string Name =
3327 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3328 auto *TaskPrivatesMap = llvm::Function::Create(
3329 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3330 &CGM.getModule());
3331 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3332 TaskPrivatesMapFnInfo);
3333 if (CGM.getLangOpts().Optimize) {
3334 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3335 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3336 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3338 CodeGenFunction CGF(CGM);
3339 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3340 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3342 // *privi = &.privates.privi;
3343 LValue Base = CGF.EmitLoadOfPointerLValue(
3344 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3345 TaskPrivatesArg.getType()->castAs<PointerType>());
3346 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3347 Counter = 0;
3348 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3349 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3350 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3351 LValue RefLVal =
3352 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3353 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3354 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3355 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3356 ++Counter;
3358 CGF.FinishFunction();
3359 return TaskPrivatesMap;
3362 /// Emit initialization for private variables in task-based directives.
3363 static void emitPrivatesInit(CodeGenFunction &CGF,
3364 const OMPExecutableDirective &D,
3365 Address KmpTaskSharedsPtr, LValue TDBase,
3366 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3367 QualType SharedsTy, QualType SharedsPtrTy,
3368 const OMPTaskDataTy &Data,
3369 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3370 ASTContext &C = CGF.getContext();
3371 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3372 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3373 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3374 ? OMPD_taskloop
3375 : OMPD_task;
3376 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3377 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3378 LValue SrcBase;
3379 bool IsTargetTask =
3380 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3381 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3382 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3383 // PointersArray, SizesArray, and MappersArray. The original variables for
3384 // these arrays are not captured and we get their addresses explicitly.
3385 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3386 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3387 SrcBase = CGF.MakeAddrLValue(
3388 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3389 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3390 CGF.ConvertTypeForMem(SharedsTy)),
3391 SharedsTy);
3393 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3394 for (const PrivateDataTy &Pair : Privates) {
3395 // Do not initialize private locals.
3396 if (Pair.second.isLocalPrivate()) {
3397 ++FI;
3398 continue;
3400 const VarDecl *VD = Pair.second.PrivateCopy;
3401 const Expr *Init = VD->getAnyInitializer();
3402 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3403 !CGF.isTrivialInitializer(Init)))) {
3404 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3405 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3406 const VarDecl *OriginalVD = Pair.second.Original;
3407 // Check if the variable is the target-based BasePointersArray,
3408 // PointersArray, SizesArray, or MappersArray.
3409 LValue SharedRefLValue;
3410 QualType Type = PrivateLValue.getType();
3411 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3412 if (IsTargetTask && !SharedField) {
3413 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3414 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3415 cast<CapturedDecl>(OriginalVD->getDeclContext())
3416 ->getNumParams() == 0 &&
3417 isa<TranslationUnitDecl>(
3418 cast<CapturedDecl>(OriginalVD->getDeclContext())
3419 ->getDeclContext()) &&
3420 "Expected artificial target data variable.");
3421 SharedRefLValue =
3422 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3423 } else if (ForDup) {
3424 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3425 SharedRefLValue = CGF.MakeAddrLValue(
3426 SharedRefLValue.getAddress(CGF).withAlignment(
3427 C.getDeclAlign(OriginalVD)),
3428 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3429 SharedRefLValue.getTBAAInfo());
3430 } else if (CGF.LambdaCaptureFields.count(
3431 Pair.second.Original->getCanonicalDecl()) > 0 ||
3432 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3433 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3434 } else {
3435 // Processing for implicitly captured variables.
3436 InlinedOpenMPRegionRAII Region(
3437 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3438 /*HasCancel=*/false, /*NoInheritance=*/true);
3439 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3441 if (Type->isArrayType()) {
3442 // Initialize firstprivate array.
3443 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3444 // Perform simple memcpy.
3445 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3446 } else {
3447 // Initialize firstprivate array using element-by-element
3448 // initialization.
3449 CGF.EmitOMPAggregateAssign(
3450 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3451 Type,
3452 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3453 Address SrcElement) {
3454 // Clean up any temporaries needed by the initialization.
3455 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3456 InitScope.addPrivate(Elem, SrcElement);
3457 (void)InitScope.Privatize();
3458 // Emit initialization for single element.
3459 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3460 CGF, &CapturesInfo);
3461 CGF.EmitAnyExprToMem(Init, DestElement,
3462 Init->getType().getQualifiers(),
3463 /*IsInitializer=*/false);
3466 } else {
3467 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3468 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3469 (void)InitScope.Privatize();
3470 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3471 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3472 /*capturedByInit=*/false);
3474 } else {
3475 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3478 ++FI;
3482 /// Check if duplication function is required for taskloops.
3483 static bool checkInitIsRequired(CodeGenFunction &CGF,
3484 ArrayRef<PrivateDataTy> Privates) {
3485 bool InitRequired = false;
3486 for (const PrivateDataTy &Pair : Privates) {
3487 if (Pair.second.isLocalPrivate())
3488 continue;
3489 const VarDecl *VD = Pair.second.PrivateCopy;
3490 const Expr *Init = VD->getAnyInitializer();
3491 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3492 !CGF.isTrivialInitializer(Init));
3493 if (InitRequired)
3494 break;
3496 return InitRequired;
3500 /// Emit task_dup function (for initialization of
3501 /// private/firstprivate/lastprivate vars and last_iter flag)
3502 /// \code
3503 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3504 /// lastpriv) {
3505 /// // setup lastprivate flag
3506 /// task_dst->last = lastpriv;
3507 /// // could be constructor calls here...
3508 /// }
3509 /// \endcode
3510 static llvm::Value *
3511 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3512 const OMPExecutableDirective &D,
3513 QualType KmpTaskTWithPrivatesPtrQTy,
3514 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3515 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3516 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3517 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3518 ASTContext &C = CGM.getContext();
3519 FunctionArgList Args;
3520 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3521 KmpTaskTWithPrivatesPtrQTy,
3522 ImplicitParamDecl::Other);
3523 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3524 KmpTaskTWithPrivatesPtrQTy,
3525 ImplicitParamDecl::Other);
3526 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3527 ImplicitParamDecl::Other);
3528 Args.push_back(&DstArg);
3529 Args.push_back(&SrcArg);
3530 Args.push_back(&LastprivArg);
3531 const auto &TaskDupFnInfo =
3532 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3533 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3534 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3535 auto *TaskDup = llvm::Function::Create(
3536 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3537 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3538 TaskDup->setDoesNotRecurse();
3539 CodeGenFunction CGF(CGM);
3540 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3541 Loc);
3543 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3544 CGF.GetAddrOfLocalVar(&DstArg),
3545 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3546 // task_dst->liter = lastpriv;
3547 if (WithLastIter) {
3548 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3549 LValue Base = CGF.EmitLValueForField(
3550 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3551 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3552 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3553 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3554 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3557 // Emit initial values for private copies (if any).
3558 assert(!Privates.empty());
3559 Address KmpTaskSharedsPtr = Address::invalid();
3560 if (!Data.FirstprivateVars.empty()) {
3561 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3562 CGF.GetAddrOfLocalVar(&SrcArg),
3563 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3564 LValue Base = CGF.EmitLValueForField(
3565 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3566 KmpTaskSharedsPtr = Address(
3567 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3568 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3569 KmpTaskTShareds)),
3570 Loc),
3571 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3573 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3574 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3575 CGF.FinishFunction();
3576 return TaskDup;
3579 /// Checks if destructor function is required to be generated.
3580 /// \return true if cleanups are required, false otherwise.
3581 static bool
3582 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3583 ArrayRef<PrivateDataTy> Privates) {
3584 for (const PrivateDataTy &P : Privates) {
3585 if (P.second.isLocalPrivate())
3586 continue;
3587 QualType Ty = P.second.Original->getType().getNonReferenceType();
3588 if (Ty.isDestructedType())
3589 return true;
3591 return false;
3594 namespace {
3595 /// Loop generator for OpenMP iterator expression.
3596 class OMPIteratorGeneratorScope final
3597 : public CodeGenFunction::OMPPrivateScope {
3598 CodeGenFunction &CGF;
3599 const OMPIteratorExpr *E = nullptr;
3600 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3601 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3602 OMPIteratorGeneratorScope() = delete;
3603 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3605 public:
3606 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3607 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3608 if (!E)
3609 return;
3610 SmallVector<llvm::Value *, 4> Uppers;
3611 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3612 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3613 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3614 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3615 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3616 addPrivate(
3617 HelperData.CounterVD,
3618 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3620 Privatize();
3622 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3623 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3624 LValue CLVal =
3625 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3626 HelperData.CounterVD->getType());
3627 // Counter = 0;
3628 CGF.EmitStoreOfScalar(
3629 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3630 CLVal);
3631 CodeGenFunction::JumpDest &ContDest =
3632 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3633 CodeGenFunction::JumpDest &ExitDest =
3634 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3635 // N = <number-of_iterations>;
3636 llvm::Value *N = Uppers[I];
3637 // cont:
3638 // if (Counter < N) goto body; else goto exit;
3639 CGF.EmitBlock(ContDest.getBlock());
3640 auto *CVal =
3641 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3642 llvm::Value *Cmp =
3643 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3644 ? CGF.Builder.CreateICmpSLT(CVal, N)
3645 : CGF.Builder.CreateICmpULT(CVal, N);
3646 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3647 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3648 // body:
3649 CGF.EmitBlock(BodyBB);
3650 // Iteri = Begini + Counter * Stepi;
3651 CGF.EmitIgnoredExpr(HelperData.Update);
3654 ~OMPIteratorGeneratorScope() {
3655 if (!E)
3656 return;
3657 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3658 // Counter = Counter + 1;
3659 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3660 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3661 // goto cont;
3662 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3663 // exit:
3664 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3668 } // namespace
3670 static std::pair<llvm::Value *, llvm::Value *>
3671 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3672 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3673 llvm::Value *Addr;
3674 if (OASE) {
3675 const Expr *Base = OASE->getBase();
3676 Addr = CGF.EmitScalarExpr(Base);
3677 } else {
3678 Addr = CGF.EmitLValue(E).getPointer(CGF);
3680 llvm::Value *SizeVal;
3681 QualType Ty = E->getType();
3682 if (OASE) {
3683 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3684 for (const Expr *SE : OASE->getDimensions()) {
3685 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3686 Sz = CGF.EmitScalarConversion(
3687 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3688 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3690 } else if (const auto *ASE =
3691 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3692 LValue UpAddrLVal =
3693 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
3694 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3695 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3696 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
3697 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3698 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3699 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3700 } else {
3701 SizeVal = CGF.getTypeSize(Ty);
3703 return std::make_pair(Addr, SizeVal);
3706 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3707 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3708 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3709 if (KmpTaskAffinityInfoTy.isNull()) {
3710 RecordDecl *KmpAffinityInfoRD =
3711 C.buildImplicitRecord("kmp_task_affinity_info_t");
3712 KmpAffinityInfoRD->startDefinition();
3713 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3714 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3715 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3716 KmpAffinityInfoRD->completeDefinition();
3717 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3721 CGOpenMPRuntime::TaskResultTy
3722 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3723 const OMPExecutableDirective &D,
3724 llvm::Function *TaskFunction, QualType SharedsTy,
3725 Address Shareds, const OMPTaskDataTy &Data) {
3726 ASTContext &C = CGM.getContext();
3727 llvm::SmallVector<PrivateDataTy, 4> Privates;
3728 // Aggregate privates and sort them by the alignment.
3729 const auto *I = Data.PrivateCopies.begin();
3730 for (const Expr *E : Data.PrivateVars) {
3731 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3732 Privates.emplace_back(
3733 C.getDeclAlign(VD),
3734 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3735 /*PrivateElemInit=*/nullptr));
3736 ++I;
3738 I = Data.FirstprivateCopies.begin();
3739 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3740 for (const Expr *E : Data.FirstprivateVars) {
3741 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3742 Privates.emplace_back(
3743 C.getDeclAlign(VD),
3744 PrivateHelpersTy(
3745 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3746 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3747 ++I;
3748 ++IElemInitRef;
3750 I = Data.LastprivateCopies.begin();
3751 for (const Expr *E : Data.LastprivateVars) {
3752 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3753 Privates.emplace_back(
3754 C.getDeclAlign(VD),
3755 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3756 /*PrivateElemInit=*/nullptr));
3757 ++I;
3759 for (const VarDecl *VD : Data.PrivateLocals) {
3760 if (isAllocatableDecl(VD))
3761 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3762 else
3763 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3765 llvm::stable_sort(Privates,
3766 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3767 return L.first > R.first;
3769 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3770 // Build type kmp_routine_entry_t (if not built yet).
3771 emitKmpRoutineEntryT(KmpInt32Ty);
3772 // Build type kmp_task_t (if not built yet).
3773 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3774 if (SavedKmpTaskloopTQTy.isNull()) {
3775 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3776 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3778 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3779 } else {
3780 assert((D.getDirectiveKind() == OMPD_task ||
3781 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3782 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3783 "Expected taskloop, task or target directive");
3784 if (SavedKmpTaskTQTy.isNull()) {
3785 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3786 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3788 KmpTaskTQTy = SavedKmpTaskTQTy;
3790 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3791 // Build particular struct kmp_task_t for the given task.
3792 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3793 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3794 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3795 QualType KmpTaskTWithPrivatesPtrQTy =
3796 C.getPointerType(KmpTaskTWithPrivatesQTy);
3797 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3798 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3799 KmpTaskTWithPrivatesTy->getPointerTo();
3800 llvm::Value *KmpTaskTWithPrivatesTySize =
3801 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3802 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3804 // Emit initial values for private copies (if any).
3805 llvm::Value *TaskPrivatesMap = nullptr;
3806 llvm::Type *TaskPrivatesMapTy =
3807 std::next(TaskFunction->arg_begin(), 3)->getType();
3808 if (!Privates.empty()) {
3809 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3810 TaskPrivatesMap =
3811 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3812 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3813 TaskPrivatesMap, TaskPrivatesMapTy);
3814 } else {
3815 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3816 cast<llvm::PointerType>(TaskPrivatesMapTy));
3818 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3819 // kmp_task_t *tt);
3820 llvm::Function *TaskEntry = emitProxyTaskFunction(
3821 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3822 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3823 TaskPrivatesMap);
3825 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3826 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3827 // kmp_routine_entry_t *task_entry);
3828 // Task flags. Format is taken from
3829 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3830 // description of kmp_tasking_flags struct.
3831 enum {
3832 TiedFlag = 0x1,
3833 FinalFlag = 0x2,
3834 DestructorsFlag = 0x8,
3835 PriorityFlag = 0x20,
3836 DetachableFlag = 0x40,
3838 unsigned Flags = Data.Tied ? TiedFlag : 0;
3839 bool NeedsCleanup = false;
3840 if (!Privates.empty()) {
3841 NeedsCleanup =
3842 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3843 if (NeedsCleanup)
3844 Flags = Flags | DestructorsFlag;
3846 if (Data.Priority.getInt())
3847 Flags = Flags | PriorityFlag;
3848 if (D.hasClausesOfKind<OMPDetachClause>())
3849 Flags = Flags | DetachableFlag;
3850 llvm::Value *TaskFlags =
3851 Data.Final.getPointer()
3852 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3853 CGF.Builder.getInt32(FinalFlag),
3854 CGF.Builder.getInt32(/*C=*/0))
3855 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3856 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3857 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3858 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3859 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3860 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3861 TaskEntry, KmpRoutineEntryPtrTy)};
3862 llvm::Value *NewTask;
3863 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3864 // Check if we have any device clause associated with the directive.
3865 const Expr *Device = nullptr;
3866 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3867 Device = C->getDevice();
3868 // Emit device ID if any otherwise use default value.
3869 llvm::Value *DeviceID;
3870 if (Device)
3871 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3872 CGF.Int64Ty, /*isSigned=*/true);
3873 else
3874 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3875 AllocArgs.push_back(DeviceID);
3876 NewTask = CGF.EmitRuntimeCall(
3877 OMPBuilder.getOrCreateRuntimeFunction(
3878 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3879 AllocArgs);
3880 } else {
3881 NewTask =
3882 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3883 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3884 AllocArgs);
3886 // Emit detach clause initialization.
3887 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3888 // task_descriptor);
3889 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3890 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3891 LValue EvtLVal = CGF.EmitLValue(Evt);
3893 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3894 // int gtid, kmp_task_t *task);
3895 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3896 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3897 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3898 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3899 OMPBuilder.getOrCreateRuntimeFunction(
3900 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3901 {Loc, Tid, NewTask});
3902 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3903 Evt->getExprLoc());
3904 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3906 // Process affinity clauses.
3907 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3908 // Process list of affinity data.
3909 ASTContext &C = CGM.getContext();
3910 Address AffinitiesArray = Address::invalid();
3911 // Calculate number of elements to form the array of affinity data.
3912 llvm::Value *NumOfElements = nullptr;
3913 unsigned NumAffinities = 0;
3914 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3915 if (const Expr *Modifier = C->getModifier()) {
3916 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3917 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3918 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3919 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3920 NumOfElements =
3921 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3923 } else {
3924 NumAffinities += C->varlist_size();
3927 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3928 // Fields ids in kmp_task_affinity_info record.
3929 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3931 QualType KmpTaskAffinityInfoArrayTy;
3932 if (NumOfElements) {
3933 NumOfElements = CGF.Builder.CreateNUWAdd(
3934 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3935 auto *OVE = new (C) OpaqueValueExpr(
3936 Loc,
3937 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3938 VK_PRValue);
3939 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3940 RValue::get(NumOfElements));
3941 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3942 KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3943 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3944 // Properly emit variable-sized array.
3945 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3946 ImplicitParamDecl::Other);
3947 CGF.EmitVarDecl(*PD);
3948 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3949 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3950 /*isSigned=*/false);
3951 } else {
3952 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3953 KmpTaskAffinityInfoTy,
3954 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3955 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3956 AffinitiesArray =
3957 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3958 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3959 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3960 /*isSigned=*/false);
3963 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3964 // Fill array by elements without iterators.
3965 unsigned Pos = 0;
3966 bool HasIterator = false;
3967 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3968 if (C->getModifier()) {
3969 HasIterator = true;
3970 continue;
3972 for (const Expr *E : C->varlists()) {
3973 llvm::Value *Addr;
3974 llvm::Value *Size;
3975 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3976 LValue Base =
3977 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3978 KmpTaskAffinityInfoTy);
3979 // affs[i].base_addr = &<Affinities[i].second>;
3980 LValue BaseAddrLVal = CGF.EmitLValueForField(
3981 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3982 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3983 BaseAddrLVal);
3984 // affs[i].len = sizeof(<Affinities[i].second>);
3985 LValue LenLVal = CGF.EmitLValueForField(
3986 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3987 CGF.EmitStoreOfScalar(Size, LenLVal);
3988 ++Pos;
3991 LValue PosLVal;
3992 if (HasIterator) {
3993 PosLVal = CGF.MakeAddrLValue(
3994 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3995 C.getSizeType());
3996 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3998 // Process elements with iterators.
3999 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4000 const Expr *Modifier = C->getModifier();
4001 if (!Modifier)
4002 continue;
4003 OMPIteratorGeneratorScope IteratorScope(
4004 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4005 for (const Expr *E : C->varlists()) {
4006 llvm::Value *Addr;
4007 llvm::Value *Size;
4008 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4009 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4010 LValue Base = CGF.MakeAddrLValue(
4011 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4012 // affs[i].base_addr = &<Affinities[i].second>;
4013 LValue BaseAddrLVal = CGF.EmitLValueForField(
4014 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4015 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4016 BaseAddrLVal);
4017 // affs[i].len = sizeof(<Affinities[i].second>);
4018 LValue LenLVal = CGF.EmitLValueForField(
4019 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4020 CGF.EmitStoreOfScalar(Size, LenLVal);
4021 Idx = CGF.Builder.CreateNUWAdd(
4022 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4023 CGF.EmitStoreOfScalar(Idx, PosLVal);
4026 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4027 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4028 // naffins, kmp_task_affinity_info_t *affin_list);
4029 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4030 llvm::Value *GTid = getThreadID(CGF, Loc);
4031 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4032 AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4033 // FIXME: Emit the function and ignore its result for now unless the
4034 // runtime function is properly implemented.
4035 (void)CGF.EmitRuntimeCall(
4036 OMPBuilder.getOrCreateRuntimeFunction(
4037 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4038 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4040 llvm::Value *NewTaskNewTaskTTy =
4041 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4042 NewTask, KmpTaskTWithPrivatesPtrTy);
4043 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4044 KmpTaskTWithPrivatesQTy);
4045 LValue TDBase =
4046 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4047 // Fill the data in the resulting kmp_task_t record.
4048 // Copy shareds if there are any.
4049 Address KmpTaskSharedsPtr = Address::invalid();
4050 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4051 KmpTaskSharedsPtr = Address(
4052 CGF.EmitLoadOfScalar(
4053 CGF.EmitLValueForField(
4054 TDBase,
4055 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4056 Loc),
4057 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4058 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4059 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4060 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4062 // Emit initial values for private copies (if any).
4063 TaskResultTy Result;
4064 if (!Privates.empty()) {
4065 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4066 SharedsTy, SharedsPtrTy, Data, Privates,
4067 /*ForDup=*/false);
4068 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4069 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4070 Result.TaskDupFn = emitTaskDupFunction(
4071 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4072 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4073 /*WithLastIter=*/!Data.LastprivateVars.empty());
4076 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4077 enum { Priority = 0, Destructors = 1 };
4078 // Provide pointer to function with destructors for privates.
4079 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4080 const RecordDecl *KmpCmplrdataUD =
4081 (*FI)->getType()->getAsUnionType()->getDecl();
4082 if (NeedsCleanup) {
4083 llvm::Value *DestructorFn = emitDestructorsFunction(
4084 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4085 KmpTaskTWithPrivatesQTy);
4086 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4087 LValue DestructorsLV = CGF.EmitLValueForField(
4088 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4089 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4090 DestructorFn, KmpRoutineEntryPtrTy),
4091 DestructorsLV);
4093 // Set priority.
4094 if (Data.Priority.getInt()) {
4095 LValue Data2LV = CGF.EmitLValueForField(
4096 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4097 LValue PriorityLV = CGF.EmitLValueForField(
4098 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4099 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4101 Result.NewTask = NewTask;
4102 Result.TaskEntry = TaskEntry;
4103 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4104 Result.TDBase = TDBase;
4105 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4106 return Result;
4109 /// Translates internal dependency kind into the runtime kind.
4110 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4111 RTLDependenceKindTy DepKind;
4112 switch (K) {
4113 case OMPC_DEPEND_in:
4114 DepKind = RTLDependenceKindTy::DepIn;
4115 break;
4116 // Out and InOut dependencies must use the same code.
4117 case OMPC_DEPEND_out:
4118 case OMPC_DEPEND_inout:
4119 DepKind = RTLDependenceKindTy::DepInOut;
4120 break;
4121 case OMPC_DEPEND_mutexinoutset:
4122 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4123 break;
4124 case OMPC_DEPEND_inoutset:
4125 DepKind = RTLDependenceKindTy::DepInOutSet;
4126 break;
4127 case OMPC_DEPEND_outallmemory:
4128 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4129 break;
4130 case OMPC_DEPEND_source:
4131 case OMPC_DEPEND_sink:
4132 case OMPC_DEPEND_depobj:
4133 case OMPC_DEPEND_inoutallmemory:
4134 case OMPC_DEPEND_unknown:
4135 llvm_unreachable("Unknown task dependence type");
4137 return DepKind;
4140 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4141 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4142 QualType &FlagsTy) {
4143 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4144 if (KmpDependInfoTy.isNull()) {
4145 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4146 KmpDependInfoRD->startDefinition();
4147 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4148 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4149 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4150 KmpDependInfoRD->completeDefinition();
4151 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4155 std::pair<llvm::Value *, LValue>
4156 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4157 SourceLocation Loc) {
4158 ASTContext &C = CGM.getContext();
4159 QualType FlagsTy;
4160 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4161 RecordDecl *KmpDependInfoRD =
4162 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4163 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4164 LValue Base = CGF.EmitLoadOfPointerLValue(
4165 DepobjLVal.getAddress(CGF).withElementType(
4166 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4167 KmpDependInfoPtrTy->castAs<PointerType>());
4168 Address DepObjAddr = CGF.Builder.CreateGEP(
4169 Base.getAddress(CGF),
4170 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4171 LValue NumDepsBase = CGF.MakeAddrLValue(
4172 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4173 // NumDeps = deps[i].base_addr;
4174 LValue BaseAddrLVal = CGF.EmitLValueForField(
4175 NumDepsBase,
4176 *std::next(KmpDependInfoRD->field_begin(),
4177 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4178 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4179 return std::make_pair(NumDeps, Base);
4182 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4183 llvm::PointerUnion<unsigned *, LValue *> Pos,
4184 const OMPTaskDataTy::DependData &Data,
4185 Address DependenciesArray) {
4186 CodeGenModule &CGM = CGF.CGM;
4187 ASTContext &C = CGM.getContext();
4188 QualType FlagsTy;
4189 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4190 RecordDecl *KmpDependInfoRD =
4191 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4192 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4194 OMPIteratorGeneratorScope IteratorScope(
4195 CGF, cast_or_null<OMPIteratorExpr>(
4196 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4197 : nullptr));
4198 for (const Expr *E : Data.DepExprs) {
4199 llvm::Value *Addr;
4200 llvm::Value *Size;
4202 // The expression will be a nullptr in the 'omp_all_memory' case.
4203 if (E) {
4204 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4205 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4206 } else {
4207 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4208 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4210 LValue Base;
4211 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4212 Base = CGF.MakeAddrLValue(
4213 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4214 } else {
4215 assert(E && "Expected a non-null expression");
4216 LValue &PosLVal = *Pos.get<LValue *>();
4217 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4218 Base = CGF.MakeAddrLValue(
4219 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4221 // deps[i].base_addr = &<Dependencies[i].second>;
4222 LValue BaseAddrLVal = CGF.EmitLValueForField(
4223 Base,
4224 *std::next(KmpDependInfoRD->field_begin(),
4225 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4226 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4227 // deps[i].len = sizeof(<Dependencies[i].second>);
4228 LValue LenLVal = CGF.EmitLValueForField(
4229 Base, *std::next(KmpDependInfoRD->field_begin(),
4230 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4231 CGF.EmitStoreOfScalar(Size, LenLVal);
4232 // deps[i].flags = <Dependencies[i].first>;
4233 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4234 LValue FlagsLVal = CGF.EmitLValueForField(
4235 Base,
4236 *std::next(KmpDependInfoRD->field_begin(),
4237 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4238 CGF.EmitStoreOfScalar(
4239 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4240 FlagsLVal);
4241 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4242 ++(*P);
4243 } else {
4244 LValue &PosLVal = *Pos.get<LValue *>();
4245 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4246 Idx = CGF.Builder.CreateNUWAdd(Idx,
4247 llvm::ConstantInt::get(Idx->getType(), 1));
4248 CGF.EmitStoreOfScalar(Idx, PosLVal);
4253 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4254 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4255 const OMPTaskDataTy::DependData &Data) {
4256 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4257 "Expected depobj dependency kind.");
4258 SmallVector<llvm::Value *, 4> Sizes;
4259 SmallVector<LValue, 4> SizeLVals;
4260 ASTContext &C = CGF.getContext();
4262 OMPIteratorGeneratorScope IteratorScope(
4263 CGF, cast_or_null<OMPIteratorExpr>(
4264 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4265 : nullptr));
4266 for (const Expr *E : Data.DepExprs) {
4267 llvm::Value *NumDeps;
4268 LValue Base;
4269 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4270 std::tie(NumDeps, Base) =
4271 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4272 LValue NumLVal = CGF.MakeAddrLValue(
4273 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4274 C.getUIntPtrType());
4275 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4276 NumLVal.getAddress(CGF));
4277 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4278 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4279 CGF.EmitStoreOfScalar(Add, NumLVal);
4280 SizeLVals.push_back(NumLVal);
4283 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4284 llvm::Value *Size =
4285 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4286 Sizes.push_back(Size);
4288 return Sizes;
4291 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4292 QualType &KmpDependInfoTy,
4293 LValue PosLVal,
4294 const OMPTaskDataTy::DependData &Data,
4295 Address DependenciesArray) {
4296 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4297 "Expected depobj dependency kind.");
4298 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4300 OMPIteratorGeneratorScope IteratorScope(
4301 CGF, cast_or_null<OMPIteratorExpr>(
4302 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4303 : nullptr));
4304 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4305 const Expr *E = Data.DepExprs[I];
4306 llvm::Value *NumDeps;
4307 LValue Base;
4308 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4309 std::tie(NumDeps, Base) =
4310 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4312 // memcopy dependency data.
4313 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4314 ElSize,
4315 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4316 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4317 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4318 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4320 // Increase pos.
4321 // pos += size;
4322 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4323 CGF.EmitStoreOfScalar(Add, PosLVal);
4328 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4329 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4330 SourceLocation Loc) {
4331 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4332 return D.DepExprs.empty();
4334 return std::make_pair(nullptr, Address::invalid());
4335 // Process list of dependencies.
4336 ASTContext &C = CGM.getContext();
4337 Address DependenciesArray = Address::invalid();
4338 llvm::Value *NumOfElements = nullptr;
4339 unsigned NumDependencies = std::accumulate(
4340 Dependencies.begin(), Dependencies.end(), 0,
4341 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4342 return D.DepKind == OMPC_DEPEND_depobj
4344 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4346 QualType FlagsTy;
4347 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4348 bool HasDepobjDeps = false;
4349 bool HasRegularWithIterators = false;
4350 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4351 llvm::Value *NumOfRegularWithIterators =
4352 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4353 // Calculate number of depobj dependencies and regular deps with the
4354 // iterators.
4355 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4356 if (D.DepKind == OMPC_DEPEND_depobj) {
4357 SmallVector<llvm::Value *, 4> Sizes =
4358 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4359 for (llvm::Value *Size : Sizes) {
4360 NumOfDepobjElements =
4361 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4363 HasDepobjDeps = true;
4364 continue;
4366 // Include number of iterations, if any.
4368 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4369 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4370 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4371 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4372 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4373 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4374 NumOfRegularWithIterators =
4375 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4377 HasRegularWithIterators = true;
4378 continue;
4382 QualType KmpDependInfoArrayTy;
4383 if (HasDepobjDeps || HasRegularWithIterators) {
4384 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4385 /*isSigned=*/false);
4386 if (HasDepobjDeps) {
4387 NumOfElements =
4388 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4390 if (HasRegularWithIterators) {
4391 NumOfElements =
4392 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4394 auto *OVE = new (C) OpaqueValueExpr(
4395 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4396 VK_PRValue);
4397 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4398 RValue::get(NumOfElements));
4399 KmpDependInfoArrayTy =
4400 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4401 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4402 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4403 // Properly emit variable-sized array.
4404 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4405 ImplicitParamDecl::Other);
4406 CGF.EmitVarDecl(*PD);
4407 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4408 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4409 /*isSigned=*/false);
4410 } else {
4411 KmpDependInfoArrayTy = C.getConstantArrayType(
4412 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4413 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4414 DependenciesArray =
4415 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4416 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4417 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4418 /*isSigned=*/false);
4420 unsigned Pos = 0;
4421 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4422 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4423 Dependencies[I].IteratorExpr)
4424 continue;
4425 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4426 DependenciesArray);
4428 // Copy regular dependencies with iterators.
4429 LValue PosLVal = CGF.MakeAddrLValue(
4430 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4431 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4432 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4433 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4434 !Dependencies[I].IteratorExpr)
4435 continue;
4436 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4437 DependenciesArray);
4439 // Copy final depobj arrays without iterators.
4440 if (HasDepobjDeps) {
4441 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4442 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4443 continue;
4444 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4445 DependenciesArray);
4448 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4449 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4450 return std::make_pair(NumOfElements, DependenciesArray);
4453 Address CGOpenMPRuntime::emitDepobjDependClause(
4454 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4455 SourceLocation Loc) {
4456 if (Dependencies.DepExprs.empty())
4457 return Address::invalid();
4458 // Process list of dependencies.
4459 ASTContext &C = CGM.getContext();
4460 Address DependenciesArray = Address::invalid();
4461 unsigned NumDependencies = Dependencies.DepExprs.size();
4462 QualType FlagsTy;
4463 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4464 RecordDecl *KmpDependInfoRD =
4465 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4467 llvm::Value *Size;
4468 // Define type kmp_depend_info[<Dependencies.size()>];
4469 // For depobj reserve one extra element to store the number of elements.
4470 // It is required to handle depobj(x) update(in) construct.
4471 // kmp_depend_info[<Dependencies.size()>] deps;
4472 llvm::Value *NumDepsVal;
4473 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4474 if (const auto *IE =
4475 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4476 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4477 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4478 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4479 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4480 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4482 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4483 NumDepsVal);
4484 CharUnits SizeInBytes =
4485 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4486 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4487 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4488 NumDepsVal =
4489 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4490 } else {
4491 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4492 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4493 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4494 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4495 Size = CGM.getSize(Sz.alignTo(Align));
4496 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4498 // Need to allocate on the dynamic memory.
4499 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4500 // Use default allocator.
4501 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4502 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4504 llvm::Value *Addr =
4505 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4506 CGM.getModule(), OMPRTL___kmpc_alloc),
4507 Args, ".dep.arr.addr");
4508 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4509 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4510 Addr, KmpDependInfoLlvmTy->getPointerTo());
4511 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4512 // Write number of elements in the first element of array for depobj.
4513 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4514 // deps[i].base_addr = NumDependencies;
4515 LValue BaseAddrLVal = CGF.EmitLValueForField(
4516 Base,
4517 *std::next(KmpDependInfoRD->field_begin(),
4518 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4519 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4520 llvm::PointerUnion<unsigned *, LValue *> Pos;
4521 unsigned Idx = 1;
4522 LValue PosLVal;
4523 if (Dependencies.IteratorExpr) {
4524 PosLVal = CGF.MakeAddrLValue(
4525 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4526 C.getSizeType());
4527 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4528 /*IsInit=*/true);
4529 Pos = &PosLVal;
4530 } else {
4531 Pos = &Idx;
4533 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4534 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4535 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4536 CGF.Int8Ty);
4537 return DependenciesArray;
4540 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4541 SourceLocation Loc) {
4542 ASTContext &C = CGM.getContext();
4543 QualType FlagsTy;
4544 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4545 LValue Base = CGF.EmitLoadOfPointerLValue(
4546 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4547 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4548 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4549 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4550 CGF.ConvertTypeForMem(KmpDependInfoTy));
4551 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4552 Addr.getElementType(), Addr.getPointer(),
4553 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4554 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4555 CGF.VoidPtrTy);
4556 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4557 // Use default allocator.
4558 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4559 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4561 // _kmpc_free(gtid, addr, nullptr);
4562 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4563 CGM.getModule(), OMPRTL___kmpc_free),
4564 Args);
4567 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4568 OpenMPDependClauseKind NewDepKind,
4569 SourceLocation Loc) {
4570 ASTContext &C = CGM.getContext();
4571 QualType FlagsTy;
4572 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4573 RecordDecl *KmpDependInfoRD =
4574 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4575 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4576 llvm::Value *NumDeps;
4577 LValue Base;
4578 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4580 Address Begin = Base.getAddress(CGF);
4581 // Cast from pointer to array type to pointer to single element.
4582 llvm::Value *End = CGF.Builder.CreateGEP(
4583 Begin.getElementType(), Begin.getPointer(), NumDeps);
4584 // The basic structure here is a while-do loop.
4585 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4586 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4587 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4588 CGF.EmitBlock(BodyBB);
4589 llvm::PHINode *ElementPHI =
4590 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4591 ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4592 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4593 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4594 Base.getTBAAInfo());
4595 // deps[i].flags = NewDepKind;
4596 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4597 LValue FlagsLVal = CGF.EmitLValueForField(
4598 Base, *std::next(KmpDependInfoRD->field_begin(),
4599 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4600 CGF.EmitStoreOfScalar(
4601 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4602 FlagsLVal);
4604 // Shift the address forward by one element.
4605 Address ElementNext =
4606 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4607 ElementPHI->addIncoming(ElementNext.getPointer(),
4608 CGF.Builder.GetInsertBlock());
4609 llvm::Value *IsEmpty =
4610 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4611 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4612 // Done.
4613 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4616 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4617 const OMPExecutableDirective &D,
4618 llvm::Function *TaskFunction,
4619 QualType SharedsTy, Address Shareds,
4620 const Expr *IfCond,
4621 const OMPTaskDataTy &Data) {
4622 if (!CGF.HaveInsertPoint())
4623 return;
4625 TaskResultTy Result =
4626 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4627 llvm::Value *NewTask = Result.NewTask;
4628 llvm::Function *TaskEntry = Result.TaskEntry;
4629 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4630 LValue TDBase = Result.TDBase;
4631 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4632 // Process list of dependences.
4633 Address DependenciesArray = Address::invalid();
4634 llvm::Value *NumOfElements;
4635 std::tie(NumOfElements, DependenciesArray) =
4636 emitDependClause(CGF, Data.Dependences, Loc);
4638 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4639 // libcall.
4640 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4641 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4642 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4643 // list is not empty
4644 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4645 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4646 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4647 llvm::Value *DepTaskArgs[7];
4648 if (!Data.Dependences.empty()) {
4649 DepTaskArgs[0] = UpLoc;
4650 DepTaskArgs[1] = ThreadID;
4651 DepTaskArgs[2] = NewTask;
4652 DepTaskArgs[3] = NumOfElements;
4653 DepTaskArgs[4] = DependenciesArray.getPointer();
4654 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4655 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4657 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4658 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4659 if (!Data.Tied) {
4660 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4661 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4662 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4664 if (!Data.Dependences.empty()) {
4665 CGF.EmitRuntimeCall(
4666 OMPBuilder.getOrCreateRuntimeFunction(
4667 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4668 DepTaskArgs);
4669 } else {
4670 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4671 CGM.getModule(), OMPRTL___kmpc_omp_task),
4672 TaskArgs);
4674 // Check if parent region is untied and build return for untied task;
4675 if (auto *Region =
4676 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4677 Region->emitUntiedSwitch(CGF);
4680 llvm::Value *DepWaitTaskArgs[7];
4681 if (!Data.Dependences.empty()) {
4682 DepWaitTaskArgs[0] = UpLoc;
4683 DepWaitTaskArgs[1] = ThreadID;
4684 DepWaitTaskArgs[2] = NumOfElements;
4685 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4686 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4687 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4688 DepWaitTaskArgs[6] =
4689 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4691 auto &M = CGM.getModule();
4692 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4693 TaskEntry, &Data, &DepWaitTaskArgs,
4694 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4695 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4696 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4697 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4698 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4699 // is specified.
4700 if (!Data.Dependences.empty())
4701 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4702 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4703 DepWaitTaskArgs);
4704 // Call proxy_task_entry(gtid, new_task);
4705 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4706 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4707 Action.Enter(CGF);
4708 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4709 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4710 OutlinedFnArgs);
4713 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4714 // kmp_task_t *new_task);
4715 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4716 // kmp_task_t *new_task);
4717 RegionCodeGenTy RCG(CodeGen);
4718 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4719 M, OMPRTL___kmpc_omp_task_begin_if0),
4720 TaskArgs,
4721 OMPBuilder.getOrCreateRuntimeFunction(
4722 M, OMPRTL___kmpc_omp_task_complete_if0),
4723 TaskArgs);
4724 RCG.setAction(Action);
4725 RCG(CGF);
4728 if (IfCond) {
4729 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4730 } else {
4731 RegionCodeGenTy ThenRCG(ThenCodeGen);
4732 ThenRCG(CGF);
4736 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4737 const OMPLoopDirective &D,
4738 llvm::Function *TaskFunction,
4739 QualType SharedsTy, Address Shareds,
4740 const Expr *IfCond,
4741 const OMPTaskDataTy &Data) {
4742 if (!CGF.HaveInsertPoint())
4743 return;
4744 TaskResultTy Result =
4745 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4746 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4747 // libcall.
4748 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4749 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4750 // sched, kmp_uint64 grainsize, void *task_dup);
4751 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4752 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4753 llvm::Value *IfVal;
4754 if (IfCond) {
4755 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4756 /*isSigned=*/true);
4757 } else {
4758 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4761 LValue LBLVal = CGF.EmitLValueForField(
4762 Result.TDBase,
4763 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4764 const auto *LBVar =
4765 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4766 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
4767 LBLVal.getQuals(),
4768 /*IsInitializer=*/true);
4769 LValue UBLVal = CGF.EmitLValueForField(
4770 Result.TDBase,
4771 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4772 const auto *UBVar =
4773 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4774 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
4775 UBLVal.getQuals(),
4776 /*IsInitializer=*/true);
4777 LValue StLVal = CGF.EmitLValueForField(
4778 Result.TDBase,
4779 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4780 const auto *StVar =
4781 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4782 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
4783 StLVal.getQuals(),
4784 /*IsInitializer=*/true);
4785 // Store reductions address.
4786 LValue RedLVal = CGF.EmitLValueForField(
4787 Result.TDBase,
4788 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4789 if (Data.Reductions) {
4790 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4791 } else {
4792 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
4793 CGF.getContext().VoidPtrTy);
4795 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4796 llvm::Value *TaskArgs[] = {
4797 UpLoc,
4798 ThreadID,
4799 Result.NewTask,
4800 IfVal,
4801 LBLVal.getPointer(CGF),
4802 UBLVal.getPointer(CGF),
4803 CGF.EmitLoadOfScalar(StLVal, Loc),
4804 llvm::ConstantInt::getSigned(
4805 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4806 llvm::ConstantInt::getSigned(
4807 CGF.IntTy, Data.Schedule.getPointer()
4808 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4809 : NoSchedule),
4810 Data.Schedule.getPointer()
4811 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4812 /*isSigned=*/false)
4813 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4814 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4815 Result.TaskDupFn, CGF.VoidPtrTy)
4816 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4817 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4818 CGM.getModule(), OMPRTL___kmpc_taskloop),
4819 TaskArgs);
4822 /// Emit reduction operation for each element of array (required for
4823 /// array sections) LHS op = RHS.
4824 /// \param Type Type of array.
4825 /// \param LHSVar Variable on the left side of the reduction operation
4826 /// (references element of array in original variable).
4827 /// \param RHSVar Variable on the right side of the reduction operation
4828 /// (references element of array in original variable).
4829 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4830 /// RHSVar.
4831 static void EmitOMPAggregateReduction(
4832 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4833 const VarDecl *RHSVar,
4834 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4835 const Expr *, const Expr *)> &RedOpGen,
4836 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4837 const Expr *UpExpr = nullptr) {
4838 // Perform element-by-element initialization.
4839 QualType ElementTy;
4840 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4841 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4843 // Drill down to the base element type on both arrays.
4844 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4845 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4847 llvm::Value *RHSBegin = RHSAddr.getPointer();
4848 llvm::Value *LHSBegin = LHSAddr.getPointer();
4849 // Cast from pointer to array type to pointer to single element.
4850 llvm::Value *LHSEnd =
4851 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4852 // The basic structure here is a while-do loop.
4853 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4854 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4855 llvm::Value *IsEmpty =
4856 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4857 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4859 // Enter the loop body, making that address the current address.
4860 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4861 CGF.EmitBlock(BodyBB);
4863 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4865 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4866 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4867 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4868 Address RHSElementCurrent(
4869 RHSElementPHI, RHSAddr.getElementType(),
4870 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4872 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4873 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4874 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4875 Address LHSElementCurrent(
4876 LHSElementPHI, LHSAddr.getElementType(),
4877 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4879 // Emit copy.
4880 CodeGenFunction::OMPPrivateScope Scope(CGF);
4881 Scope.addPrivate(LHSVar, LHSElementCurrent);
4882 Scope.addPrivate(RHSVar, RHSElementCurrent);
4883 Scope.Privatize();
4884 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4885 Scope.ForceCleanup();
4887 // Shift the address forward by one element.
4888 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4889 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4890 "omp.arraycpy.dest.element");
4891 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4892 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4893 "omp.arraycpy.src.element");
4894 // Check whether we've reached the end.
4895 llvm::Value *Done =
4896 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4897 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4898 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4899 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4901 // Done.
4902 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4905 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4906 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4907 /// UDR combiner function.
4908 static void emitReductionCombiner(CodeGenFunction &CGF,
4909 const Expr *ReductionOp) {
4910 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4911 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4912 if (const auto *DRE =
4913 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4914 if (const auto *DRD =
4915 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4916 std::pair<llvm::Function *, llvm::Function *> Reduction =
4917 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4918 RValue Func = RValue::get(Reduction.first);
4919 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4920 CGF.EmitIgnoredExpr(ReductionOp);
4921 return;
4923 CGF.EmitIgnoredExpr(ReductionOp);
4926 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4927 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4928 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4929 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4930 ASTContext &C = CGM.getContext();
4932 // void reduction_func(void *LHSArg, void *RHSArg);
4933 FunctionArgList Args;
4934 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4935 ImplicitParamDecl::Other);
4936 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4937 ImplicitParamDecl::Other);
4938 Args.push_back(&LHSArg);
4939 Args.push_back(&RHSArg);
4940 const auto &CGFI =
4941 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4942 std::string Name = getReductionFuncName(ReducerName);
4943 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4944 llvm::GlobalValue::InternalLinkage, Name,
4945 &CGM.getModule());
4946 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4947 Fn->setDoesNotRecurse();
4948 CodeGenFunction CGF(CGM);
4949 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4951 // Dst = (void*[n])(LHSArg);
4952 // Src = (void*[n])(RHSArg);
4953 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4954 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4955 ArgsElemType->getPointerTo()),
4956 ArgsElemType, CGF.getPointerAlign());
4957 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4958 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4959 ArgsElemType->getPointerTo()),
4960 ArgsElemType, CGF.getPointerAlign());
4962 // ...
4963 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4964 // ...
4965 CodeGenFunction::OMPPrivateScope Scope(CGF);
4966 const auto *IPriv = Privates.begin();
4967 unsigned Idx = 0;
4968 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4969 const auto *RHSVar =
4970 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4971 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4972 const auto *LHSVar =
4973 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4974 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4975 QualType PrivTy = (*IPriv)->getType();
4976 if (PrivTy->isVariablyModifiedType()) {
4977 // Get array size and emit VLA type.
4978 ++Idx;
4979 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4980 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4981 const VariableArrayType *VLA =
4982 CGF.getContext().getAsVariableArrayType(PrivTy);
4983 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4984 CodeGenFunction::OpaqueValueMapping OpaqueMap(
4985 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4986 CGF.EmitVariablyModifiedType(PrivTy);
4989 Scope.Privatize();
4990 IPriv = Privates.begin();
4991 const auto *ILHS = LHSExprs.begin();
4992 const auto *IRHS = RHSExprs.begin();
4993 for (const Expr *E : ReductionOps) {
4994 if ((*IPriv)->getType()->isArrayType()) {
4995 // Emit reduction for array section.
4996 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4997 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4998 EmitOMPAggregateReduction(
4999 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5000 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5001 emitReductionCombiner(CGF, E);
5003 } else {
5004 // Emit reduction for array subscript or single variable.
5005 emitReductionCombiner(CGF, E);
5007 ++IPriv;
5008 ++ILHS;
5009 ++IRHS;
5011 Scope.ForceCleanup();
5012 CGF.FinishFunction();
5013 return Fn;
5016 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5017 const Expr *ReductionOp,
5018 const Expr *PrivateRef,
5019 const DeclRefExpr *LHS,
5020 const DeclRefExpr *RHS) {
5021 if (PrivateRef->getType()->isArrayType()) {
5022 // Emit reduction for array section.
5023 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5024 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5025 EmitOMPAggregateReduction(
5026 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5027 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5028 emitReductionCombiner(CGF, ReductionOp);
5030 } else {
5031 // Emit reduction for array subscript or single variable.
5032 emitReductionCombiner(CGF, ReductionOp);
5036 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5037 ArrayRef<const Expr *> Privates,
5038 ArrayRef<const Expr *> LHSExprs,
5039 ArrayRef<const Expr *> RHSExprs,
5040 ArrayRef<const Expr *> ReductionOps,
5041 ReductionOptionsTy Options) {
5042 if (!CGF.HaveInsertPoint())
5043 return;
5045 bool WithNowait = Options.WithNowait;
5046 bool SimpleReduction = Options.SimpleReduction;
5048 // Next code should be emitted for reduction:
5050 // static kmp_critical_name lock = { 0 };
5052 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5053 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5054 // ...
5055 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5056 // *(Type<n>-1*)rhs[<n>-1]);
5057 // }
5059 // ...
5060 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5061 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5062 // RedList, reduce_func, &<lock>)) {
5063 // case 1:
5064 // ...
5065 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5066 // ...
5067 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5068 // break;
5069 // case 2:
5070 // ...
5071 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5072 // ...
5073 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5074 // break;
5075 // default:;
5076 // }
5078 // if SimpleReduction is true, only the next code is generated:
5079 // ...
5080 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5081 // ...
5083 ASTContext &C = CGM.getContext();
5085 if (SimpleReduction) {
5086 CodeGenFunction::RunCleanupsScope Scope(CGF);
5087 const auto *IPriv = Privates.begin();
5088 const auto *ILHS = LHSExprs.begin();
5089 const auto *IRHS = RHSExprs.begin();
5090 for (const Expr *E : ReductionOps) {
5091 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5092 cast<DeclRefExpr>(*IRHS));
5093 ++IPriv;
5094 ++ILHS;
5095 ++IRHS;
5097 return;
5100 // 1. Build a list of reduction variables.
5101 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5102 auto Size = RHSExprs.size();
5103 for (const Expr *E : Privates) {
5104 if (E->getType()->isVariablyModifiedType())
5105 // Reserve place for array size.
5106 ++Size;
5108 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5109 QualType ReductionArrayTy = C.getConstantArrayType(
5110 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5111 /*IndexTypeQuals=*/0);
5112 Address ReductionList =
5113 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5114 const auto *IPriv = Privates.begin();
5115 unsigned Idx = 0;
5116 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5117 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5118 CGF.Builder.CreateStore(
5119 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5120 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5121 Elem);
5122 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5123 // Store array size.
5124 ++Idx;
5125 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5126 llvm::Value *Size = CGF.Builder.CreateIntCast(
5127 CGF.getVLASize(
5128 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5129 .NumElts,
5130 CGF.SizeTy, /*isSigned=*/false);
5131 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5132 Elem);
5136 // 2. Emit reduce_func().
5137 llvm::Function *ReductionFn = emitReductionFunction(
5138 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5139 Privates, LHSExprs, RHSExprs, ReductionOps);
5141 // 3. Create static kmp_critical_name lock = { 0 };
5142 std::string Name = getName({"reduction"});
5143 llvm::Value *Lock = getCriticalRegionLock(Name);
5145 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5146 // RedList, reduce_func, &<lock>);
5147 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5148 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5149 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5150 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5151 ReductionList.getPointer(), CGF.VoidPtrTy);
5152 llvm::Value *Args[] = {
5153 IdentTLoc, // ident_t *<loc>
5154 ThreadId, // i32 <gtid>
5155 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5156 ReductionArrayTySize, // size_type sizeof(RedList)
5157 RL, // void *RedList
5158 ReductionFn, // void (*) (void *, void *) <reduce_func>
5159 Lock // kmp_critical_name *&<lock>
5161 llvm::Value *Res = CGF.EmitRuntimeCall(
5162 OMPBuilder.getOrCreateRuntimeFunction(
5163 CGM.getModule(),
5164 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5165 Args);
5167 // 5. Build switch(res)
5168 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5169 llvm::SwitchInst *SwInst =
5170 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5172 // 6. Build case 1:
5173 // ...
5174 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5175 // ...
5176 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5177 // break;
5178 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5179 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5180 CGF.EmitBlock(Case1BB);
5182 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5183 llvm::Value *EndArgs[] = {
5184 IdentTLoc, // ident_t *<loc>
5185 ThreadId, // i32 <gtid>
5186 Lock // kmp_critical_name *&<lock>
5188 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5189 CodeGenFunction &CGF, PrePostActionTy &Action) {
5190 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5191 const auto *IPriv = Privates.begin();
5192 const auto *ILHS = LHSExprs.begin();
5193 const auto *IRHS = RHSExprs.begin();
5194 for (const Expr *E : ReductionOps) {
5195 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5196 cast<DeclRefExpr>(*IRHS));
5197 ++IPriv;
5198 ++ILHS;
5199 ++IRHS;
5202 RegionCodeGenTy RCG(CodeGen);
5203 CommonActionTy Action(
5204 nullptr, std::nullopt,
5205 OMPBuilder.getOrCreateRuntimeFunction(
5206 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5207 : OMPRTL___kmpc_end_reduce),
5208 EndArgs);
5209 RCG.setAction(Action);
5210 RCG(CGF);
5212 CGF.EmitBranch(DefaultBB);
5214 // 7. Build case 2:
5215 // ...
5216 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5217 // ...
5218 // break;
5219 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5220 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5221 CGF.EmitBlock(Case2BB);
5223 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5224 CodeGenFunction &CGF, PrePostActionTy &Action) {
5225 const auto *ILHS = LHSExprs.begin();
5226 const auto *IRHS = RHSExprs.begin();
5227 const auto *IPriv = Privates.begin();
5228 for (const Expr *E : ReductionOps) {
5229 const Expr *XExpr = nullptr;
5230 const Expr *EExpr = nullptr;
5231 const Expr *UpExpr = nullptr;
5232 BinaryOperatorKind BO = BO_Comma;
5233 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5234 if (BO->getOpcode() == BO_Assign) {
5235 XExpr = BO->getLHS();
5236 UpExpr = BO->getRHS();
5239 // Try to emit update expression as a simple atomic.
5240 const Expr *RHSExpr = UpExpr;
5241 if (RHSExpr) {
5242 // Analyze RHS part of the whole expression.
5243 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5244 RHSExpr->IgnoreParenImpCasts())) {
5245 // If this is a conditional operator, analyze its condition for
5246 // min/max reduction operator.
5247 RHSExpr = ACO->getCond();
5249 if (const auto *BORHS =
5250 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5251 EExpr = BORHS->getRHS();
5252 BO = BORHS->getOpcode();
5255 if (XExpr) {
5256 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5257 auto &&AtomicRedGen = [BO, VD,
5258 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5259 const Expr *EExpr, const Expr *UpExpr) {
5260 LValue X = CGF.EmitLValue(XExpr);
5261 RValue E;
5262 if (EExpr)
5263 E = CGF.EmitAnyExpr(EExpr);
5264 CGF.EmitOMPAtomicSimpleUpdateExpr(
5265 X, E, BO, /*IsXLHSInRHSPart=*/true,
5266 llvm::AtomicOrdering::Monotonic, Loc,
5267 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5268 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5269 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5270 CGF.emitOMPSimpleStore(
5271 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5272 VD->getType().getNonReferenceType(), Loc);
5273 PrivateScope.addPrivate(VD, LHSTemp);
5274 (void)PrivateScope.Privatize();
5275 return CGF.EmitAnyExpr(UpExpr);
5278 if ((*IPriv)->getType()->isArrayType()) {
5279 // Emit atomic reduction for array section.
5280 const auto *RHSVar =
5281 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5282 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5283 AtomicRedGen, XExpr, EExpr, UpExpr);
5284 } else {
5285 // Emit atomic reduction for array subscript or single variable.
5286 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5288 } else {
5289 // Emit as a critical region.
5290 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5291 const Expr *, const Expr *) {
5292 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5293 std::string Name = RT.getName({"atomic_reduction"});
5294 RT.emitCriticalRegion(
5295 CGF, Name,
5296 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5297 Action.Enter(CGF);
5298 emitReductionCombiner(CGF, E);
5300 Loc);
5302 if ((*IPriv)->getType()->isArrayType()) {
5303 const auto *LHSVar =
5304 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5305 const auto *RHSVar =
5306 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5307 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5308 CritRedGen);
5309 } else {
5310 CritRedGen(CGF, nullptr, nullptr, nullptr);
5313 ++ILHS;
5314 ++IRHS;
5315 ++IPriv;
5318 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5319 if (!WithNowait) {
5320 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5321 llvm::Value *EndArgs[] = {
5322 IdentTLoc, // ident_t *<loc>
5323 ThreadId, // i32 <gtid>
5324 Lock // kmp_critical_name *&<lock>
5326 CommonActionTy Action(nullptr, std::nullopt,
5327 OMPBuilder.getOrCreateRuntimeFunction(
5328 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5329 EndArgs);
5330 AtomicRCG.setAction(Action);
5331 AtomicRCG(CGF);
5332 } else {
5333 AtomicRCG(CGF);
5336 CGF.EmitBranch(DefaultBB);
5337 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5340 /// Generates unique name for artificial threadprivate variables.
5341 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5342 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5343 const Expr *Ref) {
5344 SmallString<256> Buffer;
5345 llvm::raw_svector_ostream Out(Buffer);
5346 const clang::DeclRefExpr *DE;
5347 const VarDecl *D = ::getBaseDecl(Ref, DE);
5348 if (!D)
5349 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5350 D = D->getCanonicalDecl();
5351 std::string Name = CGM.getOpenMPRuntime().getName(
5352 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5353 Out << Prefix << Name << "_"
5354 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5355 return std::string(Out.str());
5358 /// Emits reduction initializer function:
5359 /// \code
5360 /// void @.red_init(void* %arg, void* %orig) {
5361 /// %0 = bitcast void* %arg to <type>*
5362 /// store <type> <init>, <type>* %0
5363 /// ret void
5364 /// }
5365 /// \endcode
5366 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5367 SourceLocation Loc,
5368 ReductionCodeGen &RCG, unsigned N) {
5369 ASTContext &C = CGM.getContext();
5370 QualType VoidPtrTy = C.VoidPtrTy;
5371 VoidPtrTy.addRestrict();
5372 FunctionArgList Args;
5373 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5374 ImplicitParamDecl::Other);
5375 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5376 ImplicitParamDecl::Other);
5377 Args.emplace_back(&Param);
5378 Args.emplace_back(&ParamOrig);
5379 const auto &FnInfo =
5380 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5381 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5382 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5383 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5384 Name, &CGM.getModule());
5385 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5386 Fn->setDoesNotRecurse();
5387 CodeGenFunction CGF(CGM);
5388 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5389 QualType PrivateType = RCG.getPrivateType(N);
5390 Address PrivateAddr = CGF.EmitLoadOfPointer(
5391 CGF.GetAddrOfLocalVar(&Param).withElementType(
5392 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5393 C.getPointerType(PrivateType)->castAs<PointerType>());
5394 llvm::Value *Size = nullptr;
5395 // If the size of the reduction item is non-constant, load it from global
5396 // threadprivate variable.
5397 if (RCG.getSizes(N).second) {
5398 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5399 CGF, CGM.getContext().getSizeType(),
5400 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5401 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5402 CGM.getContext().getSizeType(), Loc);
5404 RCG.emitAggregateType(CGF, N, Size);
5405 Address OrigAddr = Address::invalid();
5406 // If initializer uses initializer from declare reduction construct, emit a
5407 // pointer to the address of the original reduction item (reuired by reduction
5408 // initializer)
5409 if (RCG.usesReductionInitializer(N)) {
5410 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5411 OrigAddr = CGF.EmitLoadOfPointer(
5412 SharedAddr,
5413 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5415 // Emit the initializer:
5416 // %0 = bitcast void* %arg to <type>*
5417 // store <type> <init>, <type>* %0
5418 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5419 [](CodeGenFunction &) { return false; });
5420 CGF.FinishFunction();
5421 return Fn;
5424 /// Emits reduction combiner function:
5425 /// \code
5426 /// void @.red_comb(void* %arg0, void* %arg1) {
5427 /// %lhs = bitcast void* %arg0 to <type>*
5428 /// %rhs = bitcast void* %arg1 to <type>*
5429 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5430 /// store <type> %2, <type>* %lhs
5431 /// ret void
5432 /// }
5433 /// \endcode
5434 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5435 SourceLocation Loc,
5436 ReductionCodeGen &RCG, unsigned N,
5437 const Expr *ReductionOp,
5438 const Expr *LHS, const Expr *RHS,
5439 const Expr *PrivateRef) {
5440 ASTContext &C = CGM.getContext();
5441 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5442 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5443 FunctionArgList Args;
5444 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5445 C.VoidPtrTy, ImplicitParamDecl::Other);
5446 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5447 ImplicitParamDecl::Other);
5448 Args.emplace_back(&ParamInOut);
5449 Args.emplace_back(&ParamIn);
5450 const auto &FnInfo =
5451 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5452 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5453 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5454 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5455 Name, &CGM.getModule());
5456 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5457 Fn->setDoesNotRecurse();
5458 CodeGenFunction CGF(CGM);
5459 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5460 llvm::Value *Size = nullptr;
5461 // If the size of the reduction item is non-constant, load it from global
5462 // threadprivate variable.
5463 if (RCG.getSizes(N).second) {
5464 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5465 CGF, CGM.getContext().getSizeType(),
5466 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5467 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5468 CGM.getContext().getSizeType(), Loc);
5470 RCG.emitAggregateType(CGF, N, Size);
5471 // Remap lhs and rhs variables to the addresses of the function arguments.
5472 // %lhs = bitcast void* %arg0 to <type>*
5473 // %rhs = bitcast void* %arg1 to <type>*
5474 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5475 PrivateScope.addPrivate(
5476 LHSVD,
5477 // Pull out the pointer to the variable.
5478 CGF.EmitLoadOfPointer(
5479 CGF.GetAddrOfLocalVar(&ParamInOut)
5480 .withElementType(
5481 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5482 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5483 PrivateScope.addPrivate(
5484 RHSVD,
5485 // Pull out the pointer to the variable.
5486 CGF.EmitLoadOfPointer(
5487 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5488 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5489 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5490 PrivateScope.Privatize();
5491 // Emit the combiner body:
5492 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5493 // store <type> %2, <type>* %lhs
5494 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5495 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5496 cast<DeclRefExpr>(RHS));
5497 CGF.FinishFunction();
5498 return Fn;
5501 /// Emits reduction finalizer function:
5502 /// \code
5503 /// void @.red_fini(void* %arg) {
5504 /// %0 = bitcast void* %arg to <type>*
5505 /// <destroy>(<type>* %0)
5506 /// ret void
5507 /// }
5508 /// \endcode
5509 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5510 SourceLocation Loc,
5511 ReductionCodeGen &RCG, unsigned N) {
5512 if (!RCG.needCleanups(N))
5513 return nullptr;
5514 ASTContext &C = CGM.getContext();
5515 FunctionArgList Args;
5516 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5517 ImplicitParamDecl::Other);
5518 Args.emplace_back(&Param);
5519 const auto &FnInfo =
5520 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5521 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5522 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5523 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5524 Name, &CGM.getModule());
5525 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5526 Fn->setDoesNotRecurse();
5527 CodeGenFunction CGF(CGM);
5528 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5529 Address PrivateAddr = CGF.EmitLoadOfPointer(
5530 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5531 llvm::Value *Size = nullptr;
5532 // If the size of the reduction item is non-constant, load it from global
5533 // threadprivate variable.
5534 if (RCG.getSizes(N).second) {
5535 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5536 CGF, CGM.getContext().getSizeType(),
5537 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5538 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5539 CGM.getContext().getSizeType(), Loc);
5541 RCG.emitAggregateType(CGF, N, Size);
5542 // Emit the finalizer body:
5543 // <destroy>(<type>* %0)
5544 RCG.emitCleanups(CGF, N, PrivateAddr);
5545 CGF.FinishFunction(Loc);
5546 return Fn;
5549 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5550 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5551 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5552 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5553 return nullptr;
5555 // Build typedef struct:
5556 // kmp_taskred_input {
5557 // void *reduce_shar; // shared reduction item
5558 // void *reduce_orig; // original reduction item used for initialization
5559 // size_t reduce_size; // size of data item
5560 // void *reduce_init; // data initialization routine
5561 // void *reduce_fini; // data finalization routine
5562 // void *reduce_comb; // data combiner routine
5563 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5564 // } kmp_taskred_input_t;
5565 ASTContext &C = CGM.getContext();
5566 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5567 RD->startDefinition();
5568 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5569 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5570 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5571 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5572 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5573 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5574 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5575 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5576 RD->completeDefinition();
5577 QualType RDType = C.getRecordType(RD);
5578 unsigned Size = Data.ReductionVars.size();
5579 llvm::APInt ArraySize(/*numBits=*/64, Size);
5580 QualType ArrayRDType =
5581 C.getConstantArrayType(RDType, ArraySize, nullptr,
5582 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5583 // kmp_task_red_input_t .rd_input.[Size];
5584 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5585 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5586 Data.ReductionCopies, Data.ReductionOps);
5587 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5588 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5589 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5590 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5591 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5592 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5593 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5594 ".rd_input.gep.");
5595 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5596 // ElemLVal.reduce_shar = &Shareds[Cnt];
5597 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5598 RCG.emitSharedOrigLValue(CGF, Cnt);
5599 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5600 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5601 // ElemLVal.reduce_orig = &Origs[Cnt];
5602 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5603 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5604 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5605 RCG.emitAggregateType(CGF, Cnt);
5606 llvm::Value *SizeValInChars;
5607 llvm::Value *SizeVal;
5608 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5609 // We use delayed creation/initialization for VLAs and array sections. It is
5610 // required because runtime does not provide the way to pass the sizes of
5611 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5612 // threadprivate global variables are used to store these values and use
5613 // them in the functions.
5614 bool DelayedCreation = !!SizeVal;
5615 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5616 /*isSigned=*/false);
5617 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5618 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5619 // ElemLVal.reduce_init = init;
5620 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5621 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5622 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5623 // ElemLVal.reduce_fini = fini;
5624 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5625 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5626 llvm::Value *FiniAddr =
5627 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5628 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5629 // ElemLVal.reduce_comb = comb;
5630 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5631 llvm::Value *CombAddr = emitReduceCombFunction(
5632 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5633 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5634 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5635 // ElemLVal.flags = 0;
5636 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5637 if (DelayedCreation) {
5638 CGF.EmitStoreOfScalar(
5639 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5640 FlagsLVal);
5641 } else
5642 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
5643 FlagsLVal.getType());
5645 if (Data.IsReductionWithTaskMod) {
5646 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5647 // is_ws, int num, void *data);
5648 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5649 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5650 CGM.IntTy, /*isSigned=*/true);
5651 llvm::Value *Args[] = {
5652 IdentTLoc, GTid,
5653 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5654 /*isSigned=*/true),
5655 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5656 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5657 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5658 return CGF.EmitRuntimeCall(
5659 OMPBuilder.getOrCreateRuntimeFunction(
5660 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5661 Args);
5663 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5664 llvm::Value *Args[] = {
5665 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5666 /*isSigned=*/true),
5667 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5668 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5669 CGM.VoidPtrTy)};
5670 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5671 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5672 Args);
5675 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5676 SourceLocation Loc,
5677 bool IsWorksharingReduction) {
5678 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5679 // is_ws, int num, void *data);
5680 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5681 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5682 CGM.IntTy, /*isSigned=*/true);
5683 llvm::Value *Args[] = {IdentTLoc, GTid,
5684 llvm::ConstantInt::get(CGM.IntTy,
5685 IsWorksharingReduction ? 1 : 0,
5686 /*isSigned=*/true)};
5687 (void)CGF.EmitRuntimeCall(
5688 OMPBuilder.getOrCreateRuntimeFunction(
5689 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5690 Args);
5693 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5694 SourceLocation Loc,
5695 ReductionCodeGen &RCG,
5696 unsigned N) {
5697 auto Sizes = RCG.getSizes(N);
5698 // Emit threadprivate global variable if the type is non-constant
5699 // (Sizes.second = nullptr).
5700 if (Sizes.second) {
5701 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5702 /*isSigned=*/false);
5703 Address SizeAddr = getAddrOfArtificialThreadPrivate(
5704 CGF, CGM.getContext().getSizeType(),
5705 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5706 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5710 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5711 SourceLocation Loc,
5712 llvm::Value *ReductionsPtr,
5713 LValue SharedLVal) {
5714 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5715 // *d);
5716 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5717 CGM.IntTy,
5718 /*isSigned=*/true),
5719 ReductionsPtr,
5720 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5721 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5722 return Address(
5723 CGF.EmitRuntimeCall(
5724 OMPBuilder.getOrCreateRuntimeFunction(
5725 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5726 Args),
5727 CGF.Int8Ty, SharedLVal.getAlignment());
5730 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5731 const OMPTaskDataTy &Data) {
5732 if (!CGF.HaveInsertPoint())
5733 return;
5735 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5736 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5737 OMPBuilder.createTaskwait(CGF.Builder);
5738 } else {
5739 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5740 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5741 auto &M = CGM.getModule();
5742 Address DependenciesArray = Address::invalid();
5743 llvm::Value *NumOfElements;
5744 std::tie(NumOfElements, DependenciesArray) =
5745 emitDependClause(CGF, Data.Dependences, Loc);
5746 if (!Data.Dependences.empty()) {
5747 llvm::Value *DepWaitTaskArgs[7];
5748 DepWaitTaskArgs[0] = UpLoc;
5749 DepWaitTaskArgs[1] = ThreadID;
5750 DepWaitTaskArgs[2] = NumOfElements;
5751 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5752 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5753 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5754 DepWaitTaskArgs[6] =
5755 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5757 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5759 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5760 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5761 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5762 // kmp_int32 has_no_wait); if dependence info is specified.
5763 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5764 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5765 DepWaitTaskArgs);
5767 } else {
5769 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5770 // global_tid);
5771 llvm::Value *Args[] = {UpLoc, ThreadID};
5772 // Ignore return result until untied tasks are supported.
5773 CGF.EmitRuntimeCall(
5774 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5775 Args);
5779 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5780 Region->emitUntiedSwitch(CGF);
5783 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5784 OpenMPDirectiveKind InnerKind,
5785 const RegionCodeGenTy &CodeGen,
5786 bool HasCancel) {
5787 if (!CGF.HaveInsertPoint())
5788 return;
5789 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5790 InnerKind != OMPD_critical &&
5791 InnerKind != OMPD_master &&
5792 InnerKind != OMPD_masked);
5793 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5796 namespace {
5797 enum RTCancelKind {
5798 CancelNoreq = 0,
5799 CancelParallel = 1,
5800 CancelLoop = 2,
5801 CancelSections = 3,
5802 CancelTaskgroup = 4
5804 } // anonymous namespace
5806 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5807 RTCancelKind CancelKind = CancelNoreq;
5808 if (CancelRegion == OMPD_parallel)
5809 CancelKind = CancelParallel;
5810 else if (CancelRegion == OMPD_for)
5811 CancelKind = CancelLoop;
5812 else if (CancelRegion == OMPD_sections)
5813 CancelKind = CancelSections;
5814 else {
5815 assert(CancelRegion == OMPD_taskgroup);
5816 CancelKind = CancelTaskgroup;
5818 return CancelKind;
5821 void CGOpenMPRuntime::emitCancellationPointCall(
5822 CodeGenFunction &CGF, SourceLocation Loc,
5823 OpenMPDirectiveKind CancelRegion) {
5824 if (!CGF.HaveInsertPoint())
5825 return;
5826 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5827 // global_tid, kmp_int32 cncl_kind);
5828 if (auto *OMPRegionInfo =
5829 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5830 // For 'cancellation point taskgroup', the task region info may not have a
5831 // cancel. This may instead happen in another adjacent task.
5832 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5833 llvm::Value *Args[] = {
5834 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5835 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5836 // Ignore return result until untied tasks are supported.
5837 llvm::Value *Result = CGF.EmitRuntimeCall(
5838 OMPBuilder.getOrCreateRuntimeFunction(
5839 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5840 Args);
5841 // if (__kmpc_cancellationpoint()) {
5842 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5843 // exit from construct;
5844 // }
5845 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5846 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5847 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5848 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5849 CGF.EmitBlock(ExitBB);
5850 if (CancelRegion == OMPD_parallel)
5851 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5852 // exit from construct;
5853 CodeGenFunction::JumpDest CancelDest =
5854 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5855 CGF.EmitBranchThroughCleanup(CancelDest);
5856 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5861 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5862 const Expr *IfCond,
5863 OpenMPDirectiveKind CancelRegion) {
5864 if (!CGF.HaveInsertPoint())
5865 return;
5866 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5867 // kmp_int32 cncl_kind);
5868 auto &M = CGM.getModule();
5869 if (auto *OMPRegionInfo =
5870 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5871 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5872 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5873 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5874 llvm::Value *Args[] = {
5875 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5876 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5877 // Ignore return result until untied tasks are supported.
5878 llvm::Value *Result = CGF.EmitRuntimeCall(
5879 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5880 // if (__kmpc_cancel()) {
5881 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5882 // exit from construct;
5883 // }
5884 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5885 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5886 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5887 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5888 CGF.EmitBlock(ExitBB);
5889 if (CancelRegion == OMPD_parallel)
5890 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5891 // exit from construct;
5892 CodeGenFunction::JumpDest CancelDest =
5893 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5894 CGF.EmitBranchThroughCleanup(CancelDest);
5895 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5897 if (IfCond) {
5898 emitIfClause(CGF, IfCond, ThenGen,
5899 [](CodeGenFunction &, PrePostActionTy &) {});
5900 } else {
5901 RegionCodeGenTy ThenRCG(ThenGen);
5902 ThenRCG(CGF);
5907 namespace {
5908 /// Cleanup action for uses_allocators support.
5909 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5910 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5912 public:
5913 OMPUsesAllocatorsActionTy(
5914 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5915 : Allocators(Allocators) {}
5916 void Enter(CodeGenFunction &CGF) override {
5917 if (!CGF.HaveInsertPoint())
5918 return;
5919 for (const auto &AllocatorData : Allocators) {
5920 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
5921 CGF, AllocatorData.first, AllocatorData.second);
5924 void Exit(CodeGenFunction &CGF) override {
5925 if (!CGF.HaveInsertPoint())
5926 return;
5927 for (const auto &AllocatorData : Allocators) {
5928 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
5929 AllocatorData.first);
5933 } // namespace
5935 void CGOpenMPRuntime::emitTargetOutlinedFunction(
5936 const OMPExecutableDirective &D, StringRef ParentName,
5937 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5938 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5939 assert(!ParentName.empty() && "Invalid target entry parent name!");
5940 HasEmittedTargetRegion = true;
5941 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
5942 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5943 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5944 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5945 if (!D.AllocatorTraits)
5946 continue;
5947 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5950 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5951 CodeGen.setAction(UsesAllocatorAction);
5952 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5953 IsOffloadEntry, CodeGen);
5956 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
5957 const Expr *Allocator,
5958 const Expr *AllocatorTraits) {
5959 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5960 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5961 // Use default memspace handle.
5962 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5963 llvm::Value *NumTraits = llvm::ConstantInt::get(
5964 CGF.IntTy, cast<ConstantArrayType>(
5965 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5966 ->getSize()
5967 .getLimitedValue());
5968 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5969 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5970 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5971 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5972 AllocatorTraitsLVal.getBaseInfo(),
5973 AllocatorTraitsLVal.getTBAAInfo());
5974 llvm::Value *Traits = Addr.getPointer();
5976 llvm::Value *AllocatorVal =
5977 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5978 CGM.getModule(), OMPRTL___kmpc_init_allocator),
5979 {ThreadId, MemSpaceHandle, NumTraits, Traits});
5980 // Store to allocator.
5981 CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5982 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5983 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5984 AllocatorVal =
5985 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5986 Allocator->getType(), Allocator->getExprLoc());
5987 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5990 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
5991 const Expr *Allocator) {
5992 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5993 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5994 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5995 llvm::Value *AllocatorVal =
5996 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5997 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5998 CGF.getContext().VoidPtrTy,
5999 Allocator->getExprLoc());
6000 (void)CGF.EmitRuntimeCall(
6001 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6002 OMPRTL___kmpc_destroy_allocator),
6003 {ThreadId, AllocatorVal});
6006 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
6007 const OMPExecutableDirective &D, CodeGenFunction &CGF,
6008 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
6009 int32_t &MaxTeamsVal) {
6011 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
6012 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6013 /*UpperBoundOnly=*/true);
6015 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6016 for (auto *A : C->getAttrs()) {
6017 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6018 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6019 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6020 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6021 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6022 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6023 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6024 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6025 &AttrMaxThreadsVal);
6026 else
6027 continue;
6029 MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
6030 if (AttrMaxThreadsVal > 0)
6031 MaxThreadsVal = MaxThreadsVal > 0
6032 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6033 : AttrMaxThreadsVal;
6034 MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
6035 if (AttrMaxBlocksVal > 0)
6036 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6037 : AttrMaxBlocksVal;
6042 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6043 const OMPExecutableDirective &D, StringRef ParentName,
6044 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6045 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6047 llvm::TargetRegionEntryInfo EntryInfo =
6048 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6050 CodeGenFunction CGF(CGM, true);
6051 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6052 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6053 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6055 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6056 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6057 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6060 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
6061 IsOffloadEntry, OutlinedFn, OutlinedFnID);
6063 if (!OutlinedFn)
6064 return;
6066 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6068 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6069 for (auto *A : C->getAttrs()) {
6070 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6071 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6076 /// Checks if the expression is constant or does not have non-trivial function
6077 /// calls.
6078 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6079 // We can skip constant expressions.
6080 // We can skip expressions with trivial calls or simple expressions.
6081 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6082 !E->hasNonTrivialCall(Ctx)) &&
6083 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6086 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6087 const Stmt *Body) {
6088 const Stmt *Child = Body->IgnoreContainers();
6089 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6090 Child = nullptr;
6091 for (const Stmt *S : C->body()) {
6092 if (const auto *E = dyn_cast<Expr>(S)) {
6093 if (isTrivial(Ctx, E))
6094 continue;
6096 // Some of the statements can be ignored.
6097 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6098 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6099 continue;
6100 // Analyze declarations.
6101 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6102 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6103 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6104 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6105 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6106 isa<UsingDirectiveDecl>(D) ||
6107 isa<OMPDeclareReductionDecl>(D) ||
6108 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6109 return true;
6110 const auto *VD = dyn_cast<VarDecl>(D);
6111 if (!VD)
6112 return false;
6113 return VD->hasGlobalStorage() || !VD->isUsed();
6115 continue;
6117 // Found multiple children - cannot get the one child only.
6118 if (Child)
6119 return nullptr;
6120 Child = S;
6122 if (Child)
6123 Child = Child->IgnoreContainers();
6125 return Child;
6128 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6129 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6130 int32_t &MaxTeamsVal) {
6132 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6133 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6134 "Expected target-based executable directive.");
6135 switch (DirectiveKind) {
6136 case OMPD_target: {
6137 const auto *CS = D.getInnermostCapturedStmt();
6138 const auto *Body =
6139 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6140 const Stmt *ChildStmt =
6141 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6142 if (const auto *NestedDir =
6143 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6144 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6145 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6146 const Expr *NumTeams =
6147 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6148 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6149 if (auto Constant =
6150 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6151 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6152 return NumTeams;
6154 MinTeamsVal = MaxTeamsVal = 0;
6155 return nullptr;
6157 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6158 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6159 MinTeamsVal = MaxTeamsVal = 1;
6160 return nullptr;
6162 MinTeamsVal = MaxTeamsVal = 1;
6163 return nullptr;
6165 // A value of -1 is used to check if we need to emit no teams region
6166 MinTeamsVal = MaxTeamsVal = -1;
6167 return nullptr;
6169 case OMPD_target_teams_loop:
6170 case OMPD_target_teams:
6171 case OMPD_target_teams_distribute:
6172 case OMPD_target_teams_distribute_simd:
6173 case OMPD_target_teams_distribute_parallel_for:
6174 case OMPD_target_teams_distribute_parallel_for_simd: {
6175 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6176 const Expr *NumTeams =
6177 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6178 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6179 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6180 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6181 return NumTeams;
6183 MinTeamsVal = MaxTeamsVal = 0;
6184 return nullptr;
6186 case OMPD_target_parallel:
6187 case OMPD_target_parallel_for:
6188 case OMPD_target_parallel_for_simd:
6189 case OMPD_target_parallel_loop:
6190 case OMPD_target_simd:
6191 MinTeamsVal = MaxTeamsVal = 1;
6192 return nullptr;
6193 case OMPD_parallel:
6194 case OMPD_for:
6195 case OMPD_parallel_for:
6196 case OMPD_parallel_loop:
6197 case OMPD_parallel_master:
6198 case OMPD_parallel_sections:
6199 case OMPD_for_simd:
6200 case OMPD_parallel_for_simd:
6201 case OMPD_cancel:
6202 case OMPD_cancellation_point:
6203 case OMPD_ordered:
6204 case OMPD_threadprivate:
6205 case OMPD_allocate:
6206 case OMPD_task:
6207 case OMPD_simd:
6208 case OMPD_tile:
6209 case OMPD_unroll:
6210 case OMPD_sections:
6211 case OMPD_section:
6212 case OMPD_single:
6213 case OMPD_master:
6214 case OMPD_critical:
6215 case OMPD_taskyield:
6216 case OMPD_barrier:
6217 case OMPD_taskwait:
6218 case OMPD_taskgroup:
6219 case OMPD_atomic:
6220 case OMPD_flush:
6221 case OMPD_depobj:
6222 case OMPD_scan:
6223 case OMPD_teams:
6224 case OMPD_target_data:
6225 case OMPD_target_exit_data:
6226 case OMPD_target_enter_data:
6227 case OMPD_distribute:
6228 case OMPD_distribute_simd:
6229 case OMPD_distribute_parallel_for:
6230 case OMPD_distribute_parallel_for_simd:
6231 case OMPD_teams_distribute:
6232 case OMPD_teams_distribute_simd:
6233 case OMPD_teams_distribute_parallel_for:
6234 case OMPD_teams_distribute_parallel_for_simd:
6235 case OMPD_target_update:
6236 case OMPD_declare_simd:
6237 case OMPD_declare_variant:
6238 case OMPD_begin_declare_variant:
6239 case OMPD_end_declare_variant:
6240 case OMPD_declare_target:
6241 case OMPD_end_declare_target:
6242 case OMPD_declare_reduction:
6243 case OMPD_declare_mapper:
6244 case OMPD_taskloop:
6245 case OMPD_taskloop_simd:
6246 case OMPD_master_taskloop:
6247 case OMPD_master_taskloop_simd:
6248 case OMPD_parallel_master_taskloop:
6249 case OMPD_parallel_master_taskloop_simd:
6250 case OMPD_requires:
6251 case OMPD_metadirective:
6252 case OMPD_unknown:
6253 break;
6254 default:
6255 break;
6257 llvm_unreachable("Unexpected directive kind.");
6260 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6261 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6262 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6263 "Clauses associated with the teams directive expected to be emitted "
6264 "only for the host!");
6265 CGBuilderTy &Bld = CGF.Builder;
6266 int32_t MinNT = -1, MaxNT = -1;
6267 const Expr *NumTeams =
6268 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6269 if (NumTeams != nullptr) {
6270 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6272 switch (DirectiveKind) {
6273 case OMPD_target: {
6274 const auto *CS = D.getInnermostCapturedStmt();
6275 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6276 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6277 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6278 /*IgnoreResultAssign*/ true);
6279 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6280 /*isSigned=*/true);
6282 case OMPD_target_teams:
6283 case OMPD_target_teams_distribute:
6284 case OMPD_target_teams_distribute_simd:
6285 case OMPD_target_teams_distribute_parallel_for:
6286 case OMPD_target_teams_distribute_parallel_for_simd: {
6287 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6288 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6289 /*IgnoreResultAssign*/ true);
6290 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6291 /*isSigned=*/true);
6293 default:
6294 break;
6298 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6299 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6302 /// Check for a num threads constant value (stored in \p DefaultVal), or
6303 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6304 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6305 /// nullptr, no expression evaluation is perfomed.
6306 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6307 const Expr **E, int32_t &UpperBound,
6308 bool UpperBoundOnly, llvm::Value **CondVal) {
6309 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6310 CGF.getContext(), CS->getCapturedStmt());
6311 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6312 if (!Dir)
6313 return;
6315 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6316 // Handle if clause. If if clause present, the number of threads is
6317 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6318 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6319 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6320 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6321 const OMPIfClause *IfClause = nullptr;
6322 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6323 if (C->getNameModifier() == OMPD_unknown ||
6324 C->getNameModifier() == OMPD_parallel) {
6325 IfClause = C;
6326 break;
6329 if (IfClause) {
6330 const Expr *CondExpr = IfClause->getCondition();
6331 bool Result;
6332 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6333 if (!Result) {
6334 UpperBound = 1;
6335 return;
6337 } else {
6338 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6339 if (const auto *PreInit =
6340 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6341 for (const auto *I : PreInit->decls()) {
6342 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6343 CGF.EmitVarDecl(cast<VarDecl>(*I));
6344 } else {
6345 CodeGenFunction::AutoVarEmission Emission =
6346 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6347 CGF.EmitAutoVarCleanups(Emission);
6350 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6355 // Check the value of num_threads clause iff if clause was not specified
6356 // or is not evaluated to false.
6357 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6358 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6359 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6360 const auto *NumThreadsClause =
6361 Dir->getSingleClause<OMPNumThreadsClause>();
6362 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6363 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6364 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6365 UpperBound =
6366 UpperBound
6367 ? Constant->getZExtValue()
6368 : std::min(UpperBound,
6369 static_cast<int32_t>(Constant->getZExtValue()));
6370 // If we haven't found a upper bound, remember we saw a thread limiting
6371 // clause.
6372 if (UpperBound == -1)
6373 UpperBound = 0;
6374 if (!E)
6375 return;
6376 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6377 if (const auto *PreInit =
6378 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6379 for (const auto *I : PreInit->decls()) {
6380 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6381 CGF.EmitVarDecl(cast<VarDecl>(*I));
6382 } else {
6383 CodeGenFunction::AutoVarEmission Emission =
6384 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6385 CGF.EmitAutoVarCleanups(Emission);
6389 *E = NTExpr;
6391 return;
6393 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6394 UpperBound = 1;
6397 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6398 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6399 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6400 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6401 "Clauses associated with the teams directive expected to be emitted "
6402 "only for the host!");
6403 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6404 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6405 "Expected target-based executable directive.");
6407 const Expr *NT = nullptr;
6408 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6410 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6411 if (E->isIntegerConstantExpr(CGF.getContext())) {
6412 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6413 UpperBound = UpperBound ? Constant->getZExtValue()
6414 : std::min(UpperBound,
6415 int32_t(Constant->getZExtValue()));
6417 // If we haven't found a upper bound, remember we saw a thread limiting
6418 // clause.
6419 if (UpperBound == -1)
6420 UpperBound = 0;
6421 if (EPtr)
6422 *EPtr = E;
6425 auto ReturnSequential = [&]() {
6426 UpperBound = 1;
6427 return NT;
6430 switch (DirectiveKind) {
6431 case OMPD_target: {
6432 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6433 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6434 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6435 CGF.getContext(), CS->getCapturedStmt());
6436 // TODO: The standard is not clear how to resolve two thread limit clauses,
6437 // let's pick the teams one if it's present, otherwise the target one.
6438 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6439 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6440 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6441 ThreadLimitClause = TLC;
6442 if (ThreadLimitExpr) {
6443 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6444 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6445 CodeGenFunction::LexicalScope Scope(
6446 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6447 if (const auto *PreInit =
6448 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6449 for (const auto *I : PreInit->decls()) {
6450 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6451 CGF.EmitVarDecl(cast<VarDecl>(*I));
6452 } else {
6453 CodeGenFunction::AutoVarEmission Emission =
6454 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6455 CGF.EmitAutoVarCleanups(Emission);
6462 if (ThreadLimitClause)
6463 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6464 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6465 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6466 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6467 CS = Dir->getInnermostCapturedStmt();
6468 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6469 CGF.getContext(), CS->getCapturedStmt());
6470 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6472 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6473 CS = Dir->getInnermostCapturedStmt();
6474 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6475 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6476 return ReturnSequential();
6478 return NT;
6480 case OMPD_target_teams: {
6481 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6482 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6483 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6484 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6486 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6487 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6488 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6489 CGF.getContext(), CS->getCapturedStmt());
6490 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6491 if (Dir->getDirectiveKind() == OMPD_distribute) {
6492 CS = Dir->getInnermostCapturedStmt();
6493 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6496 return NT;
6498 case OMPD_target_teams_distribute:
6499 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6500 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6501 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6502 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6504 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6505 UpperBoundOnly, CondVal);
6506 return NT;
6507 case OMPD_target_teams_loop:
6508 case OMPD_target_parallel_loop:
6509 case OMPD_target_parallel:
6510 case OMPD_target_parallel_for:
6511 case OMPD_target_parallel_for_simd:
6512 case OMPD_target_teams_distribute_parallel_for:
6513 case OMPD_target_teams_distribute_parallel_for_simd: {
6514 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6515 const OMPIfClause *IfClause = nullptr;
6516 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6517 if (C->getNameModifier() == OMPD_unknown ||
6518 C->getNameModifier() == OMPD_parallel) {
6519 IfClause = C;
6520 break;
6523 if (IfClause) {
6524 const Expr *Cond = IfClause->getCondition();
6525 bool Result;
6526 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6527 if (!Result)
6528 return ReturnSequential();
6529 } else {
6530 CodeGenFunction::RunCleanupsScope Scope(CGF);
6531 *CondVal = CGF.EvaluateExprAsBool(Cond);
6535 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6536 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6537 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6538 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6540 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6541 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6542 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6543 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6544 return NumThreadsClause->getNumThreads();
6546 return NT;
6548 case OMPD_target_teams_distribute_simd:
6549 case OMPD_target_simd:
6550 return ReturnSequential();
6551 default:
6552 break;
6554 llvm_unreachable("Unsupported directive kind.");
6557 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6558 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6559 llvm::Value *NumThreadsVal = nullptr;
6560 llvm::Value *CondVal = nullptr;
6561 llvm::Value *ThreadLimitVal = nullptr;
6562 const Expr *ThreadLimitExpr = nullptr;
6563 int32_t UpperBound = -1;
6565 const Expr *NT = getNumThreadsExprForTargetDirective(
6566 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6567 &ThreadLimitExpr);
6569 // Thread limit expressions are used below, emit them.
6570 if (ThreadLimitExpr) {
6571 ThreadLimitVal =
6572 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6573 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6574 /*isSigned=*/false);
6577 // Generate the num teams expression.
6578 if (UpperBound == 1) {
6579 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6580 } else if (NT) {
6581 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6582 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6583 /*isSigned=*/false);
6584 } else if (ThreadLimitVal) {
6585 // If we do not have a num threads value but a thread limit, replace the
6586 // former with the latter. We know handled the thread limit expression.
6587 NumThreadsVal = ThreadLimitVal;
6588 ThreadLimitVal = nullptr;
6589 } else {
6590 // Default to "0" which means runtime choice.
6591 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6592 NumThreadsVal = CGF.Builder.getInt32(0);
6595 // Handle if clause. If if clause present, the number of threads is
6596 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6597 if (CondVal) {
6598 CodeGenFunction::RunCleanupsScope Scope(CGF);
6599 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6600 CGF.Builder.getInt32(1));
6603 // If the thread limit and num teams expression were present, take the
6604 // minimum.
6605 if (ThreadLimitVal) {
6606 NumThreadsVal = CGF.Builder.CreateSelect(
6607 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6608 ThreadLimitVal, NumThreadsVal);
6611 return NumThreadsVal;
6614 namespace {
6615 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6617 // Utility to handle information from clauses associated with a given
6618 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6619 // It provides a convenient interface to obtain the information and generate
6620 // code for that information.
6621 class MappableExprsHandler {
6622 public:
6623 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6624 static unsigned getFlagMemberOffset() {
6625 unsigned Offset = 0;
6626 for (uint64_t Remain =
6627 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6628 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6629 !(Remain & 1); Remain = Remain >> 1)
6630 Offset++;
6631 return Offset;
6634 /// Class that holds debugging information for a data mapping to be passed to
6635 /// the runtime library.
6636 class MappingExprInfo {
6637 /// The variable declaration used for the data mapping.
6638 const ValueDecl *MapDecl = nullptr;
6639 /// The original expression used in the map clause, or null if there is
6640 /// none.
6641 const Expr *MapExpr = nullptr;
6643 public:
6644 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6645 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6647 const ValueDecl *getMapDecl() const { return MapDecl; }
6648 const Expr *getMapExpr() const { return MapExpr; }
6651 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6652 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6653 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6654 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6655 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6656 using MapNonContiguousArrayTy =
6657 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6658 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6659 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6661 /// This structure contains combined information generated for mappable
6662 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6663 /// mappers, and non-contiguous information.
6664 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6665 MapExprsArrayTy Exprs;
6666 MapValueDeclsArrayTy Mappers;
6667 MapValueDeclsArrayTy DevicePtrDecls;
6669 /// Append arrays in \a CurInfo.
6670 void append(MapCombinedInfoTy &CurInfo) {
6671 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6672 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6673 CurInfo.DevicePtrDecls.end());
6674 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6675 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6679 /// Map between a struct and the its lowest & highest elements which have been
6680 /// mapped.
6681 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6682 /// HE(FieldIndex, Pointer)}
6683 struct StructRangeInfoTy {
6684 MapCombinedInfoTy PreliminaryMapData;
6685 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6686 0, Address::invalid()};
6687 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6688 0, Address::invalid()};
6689 Address Base = Address::invalid();
6690 Address LB = Address::invalid();
6691 bool IsArraySection = false;
6692 bool HasCompleteRecord = false;
6695 private:
6696 /// Kind that defines how a device pointer has to be returned.
6697 struct MapInfo {
6698 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6699 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6700 ArrayRef<OpenMPMapModifierKind> MapModifiers;
6701 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6702 bool ReturnDevicePointer = false;
6703 bool IsImplicit = false;
6704 const ValueDecl *Mapper = nullptr;
6705 const Expr *VarRef = nullptr;
6706 bool ForDeviceAddr = false;
6708 MapInfo() = default;
6709 MapInfo(
6710 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6711 OpenMPMapClauseKind MapType,
6712 ArrayRef<OpenMPMapModifierKind> MapModifiers,
6713 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6714 bool ReturnDevicePointer, bool IsImplicit,
6715 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6716 bool ForDeviceAddr = false)
6717 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6718 MotionModifiers(MotionModifiers),
6719 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6720 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6723 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6724 /// member and there is no map information about it, then emission of that
6725 /// entry is deferred until the whole struct has been processed.
6726 struct DeferredDevicePtrEntryTy {
6727 const Expr *IE = nullptr;
6728 const ValueDecl *VD = nullptr;
6729 bool ForDeviceAddr = false;
6731 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6732 bool ForDeviceAddr)
6733 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6736 /// The target directive from where the mappable clauses were extracted. It
6737 /// is either a executable directive or a user-defined mapper directive.
6738 llvm::PointerUnion<const OMPExecutableDirective *,
6739 const OMPDeclareMapperDecl *>
6740 CurDir;
6742 /// Function the directive is being generated for.
6743 CodeGenFunction &CGF;
6745 /// Set of all first private variables in the current directive.
6746 /// bool data is set to true if the variable is implicitly marked as
6747 /// firstprivate, false otherwise.
6748 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6750 /// Map between device pointer declarations and their expression components.
6751 /// The key value for declarations in 'this' is null.
6752 llvm::DenseMap<
6753 const ValueDecl *,
6754 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6755 DevPointersMap;
6757 /// Map between device addr declarations and their expression components.
6758 /// The key value for declarations in 'this' is null.
6759 llvm::DenseMap<
6760 const ValueDecl *,
6761 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6762 HasDevAddrsMap;
6764 /// Map between lambda declarations and their map type.
6765 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6767 llvm::Value *getExprTypeSize(const Expr *E) const {
6768 QualType ExprTy = E->getType().getCanonicalType();
6770 // Calculate the size for array shaping expression.
6771 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6772 llvm::Value *Size =
6773 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6774 for (const Expr *SE : OAE->getDimensions()) {
6775 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6776 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6777 CGF.getContext().getSizeType(),
6778 SE->getExprLoc());
6779 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6781 return Size;
6784 // Reference types are ignored for mapping purposes.
6785 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6786 ExprTy = RefTy->getPointeeType().getCanonicalType();
6788 // Given that an array section is considered a built-in type, we need to
6789 // do the calculation based on the length of the section instead of relying
6790 // on CGF.getTypeSize(E->getType()).
6791 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6792 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6793 OAE->getBase()->IgnoreParenImpCasts())
6794 .getCanonicalType();
6796 // If there is no length associated with the expression and lower bound is
6797 // not specified too, that means we are using the whole length of the
6798 // base.
6799 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6800 !OAE->getLowerBound())
6801 return CGF.getTypeSize(BaseTy);
6803 llvm::Value *ElemSize;
6804 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6805 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6806 } else {
6807 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6808 assert(ATy && "Expecting array type if not a pointer type.");
6809 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6812 // If we don't have a length at this point, that is because we have an
6813 // array section with a single element.
6814 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6815 return ElemSize;
6817 if (const Expr *LenExpr = OAE->getLength()) {
6818 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6819 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6820 CGF.getContext().getSizeType(),
6821 LenExpr->getExprLoc());
6822 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6824 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6825 OAE->getLowerBound() && "expected array_section[lb:].");
6826 // Size = sizetype - lb * elemtype;
6827 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6828 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6829 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6830 CGF.getContext().getSizeType(),
6831 OAE->getLowerBound()->getExprLoc());
6832 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6833 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6834 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6835 LengthVal = CGF.Builder.CreateSelect(
6836 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6837 return LengthVal;
6839 return CGF.getTypeSize(ExprTy);
6842 /// Return the corresponding bits for a given map clause modifier. Add
6843 /// a flag marking the map as a pointer if requested. Add a flag marking the
6844 /// map as the first one of a series of maps that relate to the same map
6845 /// expression.
6846 OpenMPOffloadMappingFlags getMapTypeBits(
6847 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6848 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6849 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6850 OpenMPOffloadMappingFlags Bits =
6851 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6852 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6853 switch (MapType) {
6854 case OMPC_MAP_alloc:
6855 case OMPC_MAP_release:
6856 // alloc and release is the default behavior in the runtime library, i.e.
6857 // if we don't pass any bits alloc/release that is what the runtime is
6858 // going to do. Therefore, we don't need to signal anything for these two
6859 // type modifiers.
6860 break;
6861 case OMPC_MAP_to:
6862 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6863 break;
6864 case OMPC_MAP_from:
6865 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6866 break;
6867 case OMPC_MAP_tofrom:
6868 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6869 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6870 break;
6871 case OMPC_MAP_delete:
6872 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6873 break;
6874 case OMPC_MAP_unknown:
6875 llvm_unreachable("Unexpected map type!");
6877 if (AddPtrFlag)
6878 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6879 if (AddIsTargetParamFlag)
6880 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6881 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6882 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6883 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6884 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6885 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6886 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6887 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6888 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6889 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6890 if (IsNonContiguous)
6891 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6892 return Bits;
6895 /// Return true if the provided expression is a final array section. A
6896 /// final array section, is one whose length can't be proved to be one.
6897 bool isFinalArraySectionExpression(const Expr *E) const {
6898 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6900 // It is not an array section and therefore not a unity-size one.
6901 if (!OASE)
6902 return false;
6904 // An array section with no colon always refer to a single element.
6905 if (OASE->getColonLocFirst().isInvalid())
6906 return false;
6908 const Expr *Length = OASE->getLength();
6910 // If we don't have a length we have to check if the array has size 1
6911 // for this dimension. Also, we should always expect a length if the
6912 // base type is pointer.
6913 if (!Length) {
6914 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
6915 OASE->getBase()->IgnoreParenImpCasts())
6916 .getCanonicalType();
6917 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6918 return ATy->getSize().getSExtValue() != 1;
6919 // If we don't have a constant dimension length, we have to consider
6920 // the current section as having any size, so it is not necessarily
6921 // unitary. If it happen to be unity size, that's user fault.
6922 return true;
6925 // Check if the length evaluates to 1.
6926 Expr::EvalResult Result;
6927 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6928 return true; // Can have more that size 1.
6930 llvm::APSInt ConstLength = Result.Val.getInt();
6931 return ConstLength.getSExtValue() != 1;
6934 /// Generate the base pointers, section pointers, sizes, map type bits, and
6935 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6936 /// map type, map or motion modifiers, and expression components.
6937 /// \a IsFirstComponent should be set to true if the provided set of
6938 /// components is the first associated with a capture.
6939 void generateInfoForComponentList(
6940 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6941 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6942 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6943 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
6944 bool IsFirstComponentList, bool IsImplicit,
6945 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6946 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6947 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6948 OverlappedElements = std::nullopt) const {
6949 // The following summarizes what has to be generated for each map and the
6950 // types below. The generated information is expressed in this order:
6951 // base pointer, section pointer, size, flags
6952 // (to add to the ones that come from the map type and modifier).
6954 // double d;
6955 // int i[100];
6956 // float *p;
6957 // int **a = &i;
6959 // struct S1 {
6960 // int i;
6961 // float f[50];
6962 // }
6963 // struct S2 {
6964 // int i;
6965 // float f[50];
6966 // S1 s;
6967 // double *p;
6968 // struct S2 *ps;
6969 // int &ref;
6970 // }
6971 // S2 s;
6972 // S2 *ps;
6974 // map(d)
6975 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6977 // map(i)
6978 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6980 // map(i[1:23])
6981 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6983 // map(p)
6984 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6986 // map(p[1:24])
6987 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6988 // in unified shared memory mode or for local pointers
6989 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6991 // map((*a)[0:3])
6992 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6993 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6995 // map(**a)
6996 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6997 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6999 // map(s)
7000 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7002 // map(s.i)
7003 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7005 // map(s.s.f)
7006 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7008 // map(s.p)
7009 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7011 // map(to: s.p[:22])
7012 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7013 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7014 // &(s.p), &(s.p[0]), 22*sizeof(double),
7015 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7016 // (*) alloc space for struct members, only this is a target parameter
7017 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7018 // optimizes this entry out, same in the examples below)
7019 // (***) map the pointee (map: to)
7021 // map(to: s.ref)
7022 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7023 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7024 // (*) alloc space for struct members, only this is a target parameter
7025 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7026 // optimizes this entry out, same in the examples below)
7027 // (***) map the pointee (map: to)
7029 // map(s.ps)
7030 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7032 // map(from: s.ps->s.i)
7033 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7034 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7035 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7037 // map(to: s.ps->ps)
7038 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7039 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7040 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7042 // map(s.ps->ps->ps)
7043 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7044 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7045 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7046 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7048 // map(to: s.ps->ps->s.f[:22])
7049 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7050 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7051 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7052 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7054 // map(ps)
7055 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7057 // map(ps->i)
7058 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7060 // map(ps->s.f)
7061 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7063 // map(from: ps->p)
7064 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7066 // map(to: ps->p[:22])
7067 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7068 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7069 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7071 // map(ps->ps)
7072 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7074 // map(from: ps->ps->s.i)
7075 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7076 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7077 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7079 // map(from: ps->ps->ps)
7080 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7081 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7082 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7084 // map(ps->ps->ps->ps)
7085 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7086 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7087 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7088 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7090 // map(to: ps->ps->ps->s.f[:22])
7091 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7092 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7093 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7094 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7096 // map(to: s.f[:22]) map(from: s.p[:33])
7097 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7098 // sizeof(double*) (**), TARGET_PARAM
7099 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7100 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7101 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7102 // (*) allocate contiguous space needed to fit all mapped members even if
7103 // we allocate space for members not mapped (in this example,
7104 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7105 // them as well because they fall between &s.f[0] and &s.p)
7107 // map(from: s.f[:22]) map(to: ps->p[:33])
7108 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7109 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7110 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7111 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7112 // (*) the struct this entry pertains to is the 2nd element in the list of
7113 // arguments, hence MEMBER_OF(2)
7115 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7116 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7117 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7118 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7119 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7120 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7121 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7122 // (*) the struct this entry pertains to is the 4th element in the list
7123 // of arguments, hence MEMBER_OF(4)
7125 // Track if the map information being generated is the first for a capture.
7126 bool IsCaptureFirstInfo = IsFirstComponentList;
7127 // When the variable is on a declare target link or in a to clause with
7128 // unified memory, a reference is needed to hold the host/device address
7129 // of the variable.
7130 bool RequiresReference = false;
7132 // Scan the components from the base to the complete expression.
7133 auto CI = Components.rbegin();
7134 auto CE = Components.rend();
7135 auto I = CI;
7137 // Track if the map information being generated is the first for a list of
7138 // components.
7139 bool IsExpressionFirstInfo = true;
7140 bool FirstPointerInComplexData = false;
7141 Address BP = Address::invalid();
7142 const Expr *AssocExpr = I->getAssociatedExpression();
7143 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7144 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7145 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7147 if (isa<MemberExpr>(AssocExpr)) {
7148 // The base is the 'this' pointer. The content of the pointer is going
7149 // to be the base of the field being mapped.
7150 BP = CGF.LoadCXXThisAddress();
7151 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7152 (OASE &&
7153 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7154 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7155 } else if (OAShE &&
7156 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7157 BP = Address(
7158 CGF.EmitScalarExpr(OAShE->getBase()),
7159 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7160 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7161 } else {
7162 // The base is the reference to the variable.
7163 // BP = &Var.
7164 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7165 if (const auto *VD =
7166 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7167 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7168 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7169 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7170 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7171 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7172 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7173 RequiresReference = true;
7174 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7179 // If the variable is a pointer and is being dereferenced (i.e. is not
7180 // the last component), the base has to be the pointer itself, not its
7181 // reference. References are ignored for mapping purposes.
7182 QualType Ty =
7183 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7184 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7185 // No need to generate individual map information for the pointer, it
7186 // can be associated with the combined storage if shared memory mode is
7187 // active or the base declaration is not global variable.
7188 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7189 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7190 !VD || VD->hasLocalStorage())
7191 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7192 else
7193 FirstPointerInComplexData = true;
7194 ++I;
7198 // Track whether a component of the list should be marked as MEMBER_OF some
7199 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7200 // in a component list should be marked as MEMBER_OF, all subsequent entries
7201 // do not belong to the base struct. E.g.
7202 // struct S2 s;
7203 // s.ps->ps->ps->f[:]
7204 // (1) (2) (3) (4)
7205 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7206 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7207 // is the pointee of ps(2) which is not member of struct s, so it should not
7208 // be marked as such (it is still PTR_AND_OBJ).
7209 // The variable is initialized to false so that PTR_AND_OBJ entries which
7210 // are not struct members are not considered (e.g. array of pointers to
7211 // data).
7212 bool ShouldBeMemberOf = false;
7214 // Variable keeping track of whether or not we have encountered a component
7215 // in the component list which is a member expression. Useful when we have a
7216 // pointer or a final array section, in which case it is the previous
7217 // component in the list which tells us whether we have a member expression.
7218 // E.g. X.f[:]
7219 // While processing the final array section "[:]" it is "f" which tells us
7220 // whether we are dealing with a member of a declared struct.
7221 const MemberExpr *EncounteredME = nullptr;
7223 // Track for the total number of dimension. Start from one for the dummy
7224 // dimension.
7225 uint64_t DimSize = 1;
7227 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7228 bool IsPrevMemberReference = false;
7230 for (; I != CE; ++I) {
7231 // If the current component is member of a struct (parent struct) mark it.
7232 if (!EncounteredME) {
7233 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7234 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7235 // as MEMBER_OF the parent struct.
7236 if (EncounteredME) {
7237 ShouldBeMemberOf = true;
7238 // Do not emit as complex pointer if this is actually not array-like
7239 // expression.
7240 if (FirstPointerInComplexData) {
7241 QualType Ty = std::prev(I)
7242 ->getAssociatedDeclaration()
7243 ->getType()
7244 .getNonReferenceType();
7245 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7246 FirstPointerInComplexData = false;
7251 auto Next = std::next(I);
7253 // We need to generate the addresses and sizes if this is the last
7254 // component, if the component is a pointer or if it is an array section
7255 // whose length can't be proved to be one. If this is a pointer, it
7256 // becomes the base address for the following components.
7258 // A final array section, is one whose length can't be proved to be one.
7259 // If the map item is non-contiguous then we don't treat any array section
7260 // as final array section.
7261 bool IsFinalArraySection =
7262 !IsNonContiguous &&
7263 isFinalArraySectionExpression(I->getAssociatedExpression());
7265 // If we have a declaration for the mapping use that, otherwise use
7266 // the base declaration of the map clause.
7267 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7268 ? I->getAssociatedDeclaration()
7269 : BaseDecl;
7270 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7271 : MapExpr;
7273 // Get information on whether the element is a pointer. Have to do a
7274 // special treatment for array sections given that they are built-in
7275 // types.
7276 const auto *OASE =
7277 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7278 const auto *OAShE =
7279 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7280 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7281 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7282 bool IsPointer =
7283 OAShE ||
7284 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7285 .getCanonicalType()
7286 ->isAnyPointerType()) ||
7287 I->getAssociatedExpression()->getType()->isAnyPointerType();
7288 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7289 MapDecl &&
7290 MapDecl->getType()->isLValueReferenceType();
7291 bool IsNonDerefPointer = IsPointer &&
7292 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7293 !IsNonContiguous;
7295 if (OASE)
7296 ++DimSize;
7298 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7299 IsFinalArraySection) {
7300 // If this is not the last component, we expect the pointer to be
7301 // associated with an array expression or member expression.
7302 assert((Next == CE ||
7303 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7304 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7305 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7306 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7307 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7308 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7309 "Unexpected expression");
7311 Address LB = Address::invalid();
7312 Address LowestElem = Address::invalid();
7313 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7314 const MemberExpr *E) {
7315 const Expr *BaseExpr = E->getBase();
7316 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7317 // scalar.
7318 LValue BaseLV;
7319 if (E->isArrow()) {
7320 LValueBaseInfo BaseInfo;
7321 TBAAAccessInfo TBAAInfo;
7322 Address Addr =
7323 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7324 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7325 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7326 } else {
7327 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7329 return BaseLV;
7331 if (OAShE) {
7332 LowestElem = LB =
7333 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7334 CGF.ConvertTypeForMem(
7335 OAShE->getBase()->getType()->getPointeeType()),
7336 CGF.getContext().getTypeAlignInChars(
7337 OAShE->getBase()->getType()));
7338 } else if (IsMemberReference) {
7339 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7340 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7341 LowestElem = CGF.EmitLValueForFieldInitialization(
7342 BaseLVal, cast<FieldDecl>(MapDecl))
7343 .getAddress(CGF);
7344 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7345 .getAddress(CGF);
7346 } else {
7347 LowestElem = LB =
7348 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7349 .getAddress(CGF);
7352 // If this component is a pointer inside the base struct then we don't
7353 // need to create any entry for it - it will be combined with the object
7354 // it is pointing to into a single PTR_AND_OBJ entry.
7355 bool IsMemberPointerOrAddr =
7356 EncounteredME &&
7357 (((IsPointer || ForDeviceAddr) &&
7358 I->getAssociatedExpression() == EncounteredME) ||
7359 (IsPrevMemberReference && !IsPointer) ||
7360 (IsMemberReference && Next != CE &&
7361 !Next->getAssociatedExpression()->getType()->isPointerType()));
7362 if (!OverlappedElements.empty() && Next == CE) {
7363 // Handle base element with the info for overlapped elements.
7364 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7365 assert(!IsPointer &&
7366 "Unexpected base element with the pointer type.");
7367 // Mark the whole struct as the struct that requires allocation on the
7368 // device.
7369 PartialStruct.LowestElem = {0, LowestElem};
7370 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7371 I->getAssociatedExpression()->getType());
7372 Address HB = CGF.Builder.CreateConstGEP(
7373 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7374 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7375 TypeSize.getQuantity() - 1);
7376 PartialStruct.HighestElem = {
7377 std::numeric_limits<decltype(
7378 PartialStruct.HighestElem.first)>::max(),
7379 HB};
7380 PartialStruct.Base = BP;
7381 PartialStruct.LB = LB;
7382 assert(
7383 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7384 "Overlapped elements must be used only once for the variable.");
7385 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7386 // Emit data for non-overlapped data.
7387 OpenMPOffloadMappingFlags Flags =
7388 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7389 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7390 /*AddPtrFlag=*/false,
7391 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7392 llvm::Value *Size = nullptr;
7393 // Do bitcopy of all non-overlapped structure elements.
7394 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7395 Component : OverlappedElements) {
7396 Address ComponentLB = Address::invalid();
7397 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7398 Component) {
7399 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7400 const auto *FD = dyn_cast<FieldDecl>(VD);
7401 if (FD && FD->getType()->isLValueReferenceType()) {
7402 const auto *ME =
7403 cast<MemberExpr>(MC.getAssociatedExpression());
7404 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7405 ComponentLB =
7406 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7407 .getAddress(CGF);
7408 } else {
7409 ComponentLB =
7410 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7411 .getAddress(CGF);
7413 Size = CGF.Builder.CreatePtrDiff(
7414 CGF.Int8Ty, ComponentLB.getPointer(), LB.getPointer());
7415 break;
7418 assert(Size && "Failed to determine structure size");
7419 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7420 CombinedInfo.BasePointers.push_back(BP.getPointer());
7421 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7422 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7423 CombinedInfo.Pointers.push_back(LB.getPointer());
7424 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7425 Size, CGF.Int64Ty, /*isSigned=*/true));
7426 CombinedInfo.Types.push_back(Flags);
7427 CombinedInfo.Mappers.push_back(nullptr);
7428 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7429 : 1);
7430 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7432 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7433 CombinedInfo.BasePointers.push_back(BP.getPointer());
7434 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7435 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7436 CombinedInfo.Pointers.push_back(LB.getPointer());
7437 Size = CGF.Builder.CreatePtrDiff(
7438 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7439 LB.getPointer());
7440 CombinedInfo.Sizes.push_back(
7441 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7442 CombinedInfo.Types.push_back(Flags);
7443 CombinedInfo.Mappers.push_back(nullptr);
7444 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7445 : 1);
7446 break;
7448 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7449 if (!IsMemberPointerOrAddr ||
7450 (Next == CE && MapType != OMPC_MAP_unknown)) {
7451 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7452 CombinedInfo.BasePointers.push_back(BP.getPointer());
7453 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7454 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7455 CombinedInfo.Pointers.push_back(LB.getPointer());
7456 CombinedInfo.Sizes.push_back(
7457 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7458 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7459 : 1);
7461 // If Mapper is valid, the last component inherits the mapper.
7462 bool HasMapper = Mapper && Next == CE;
7463 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7465 // We need to add a pointer flag for each map that comes from the
7466 // same expression except for the first one. We also need to signal
7467 // this map is the first one that relates with the current capture
7468 // (there is a set of entries for each capture).
7469 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7470 MapType, MapModifiers, MotionModifiers, IsImplicit,
7471 !IsExpressionFirstInfo || RequiresReference ||
7472 FirstPointerInComplexData || IsMemberReference,
7473 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7475 if (!IsExpressionFirstInfo || IsMemberReference) {
7476 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7477 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7478 if (IsPointer || (IsMemberReference && Next != CE))
7479 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7480 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7481 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7482 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7483 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7485 if (ShouldBeMemberOf) {
7486 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7487 // should be later updated with the correct value of MEMBER_OF.
7488 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7489 // From now on, all subsequent PTR_AND_OBJ entries should not be
7490 // marked as MEMBER_OF.
7491 ShouldBeMemberOf = false;
7495 CombinedInfo.Types.push_back(Flags);
7498 // If we have encountered a member expression so far, keep track of the
7499 // mapped member. If the parent is "*this", then the value declaration
7500 // is nullptr.
7501 if (EncounteredME) {
7502 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7503 unsigned FieldIndex = FD->getFieldIndex();
7505 // Update info about the lowest and highest elements for this struct
7506 if (!PartialStruct.Base.isValid()) {
7507 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7508 if (IsFinalArraySection) {
7509 Address HB =
7510 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7511 .getAddress(CGF);
7512 PartialStruct.HighestElem = {FieldIndex, HB};
7513 } else {
7514 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7516 PartialStruct.Base = BP;
7517 PartialStruct.LB = BP;
7518 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7519 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7520 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7521 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7525 // Need to emit combined struct for array sections.
7526 if (IsFinalArraySection || IsNonContiguous)
7527 PartialStruct.IsArraySection = true;
7529 // If we have a final array section, we are done with this expression.
7530 if (IsFinalArraySection)
7531 break;
7533 // The pointer becomes the base for the next element.
7534 if (Next != CE)
7535 BP = IsMemberReference ? LowestElem : LB;
7537 IsExpressionFirstInfo = false;
7538 IsCaptureFirstInfo = false;
7539 FirstPointerInComplexData = false;
7540 IsPrevMemberReference = IsMemberReference;
7541 } else if (FirstPointerInComplexData) {
7542 QualType Ty = Components.rbegin()
7543 ->getAssociatedDeclaration()
7544 ->getType()
7545 .getNonReferenceType();
7546 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7547 FirstPointerInComplexData = false;
7550 // If ran into the whole component - allocate the space for the whole
7551 // record.
7552 if (!EncounteredME)
7553 PartialStruct.HasCompleteRecord = true;
7555 if (!IsNonContiguous)
7556 return;
7558 const ASTContext &Context = CGF.getContext();
7560 // For supporting stride in array section, we need to initialize the first
7561 // dimension size as 1, first offset as 0, and first count as 1
7562 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7563 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7564 MapValuesArrayTy CurStrides;
7565 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7566 uint64_t ElementTypeSize;
7568 // Collect Size information for each dimension and get the element size as
7569 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7570 // should be [10, 10] and the first stride is 4 btyes.
7571 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7572 Components) {
7573 const Expr *AssocExpr = Component.getAssociatedExpression();
7574 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7576 if (!OASE)
7577 continue;
7579 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7580 auto *CAT = Context.getAsConstantArrayType(Ty);
7581 auto *VAT = Context.getAsVariableArrayType(Ty);
7583 // We need all the dimension size except for the last dimension.
7584 assert((VAT || CAT || &Component == &*Components.begin()) &&
7585 "Should be either ConstantArray or VariableArray if not the "
7586 "first Component");
7588 // Get element size if CurStrides is empty.
7589 if (CurStrides.empty()) {
7590 const Type *ElementType = nullptr;
7591 if (CAT)
7592 ElementType = CAT->getElementType().getTypePtr();
7593 else if (VAT)
7594 ElementType = VAT->getElementType().getTypePtr();
7595 else
7596 assert(&Component == &*Components.begin() &&
7597 "Only expect pointer (non CAT or VAT) when this is the "
7598 "first Component");
7599 // If ElementType is null, then it means the base is a pointer
7600 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7601 // for next iteration.
7602 if (ElementType) {
7603 // For the case that having pointer as base, we need to remove one
7604 // level of indirection.
7605 if (&Component != &*Components.begin())
7606 ElementType = ElementType->getPointeeOrArrayElementType();
7607 ElementTypeSize =
7608 Context.getTypeSizeInChars(ElementType).getQuantity();
7609 CurStrides.push_back(
7610 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7613 // Get dimension value except for the last dimension since we don't need
7614 // it.
7615 if (DimSizes.size() < Components.size() - 1) {
7616 if (CAT)
7617 DimSizes.push_back(llvm::ConstantInt::get(
7618 CGF.Int64Ty, CAT->getSize().getZExtValue()));
7619 else if (VAT)
7620 DimSizes.push_back(CGF.Builder.CreateIntCast(
7621 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7622 /*IsSigned=*/false));
7626 // Skip the dummy dimension since we have already have its information.
7627 auto *DI = DimSizes.begin() + 1;
7628 // Product of dimension.
7629 llvm::Value *DimProd =
7630 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7632 // Collect info for non-contiguous. Notice that offset, count, and stride
7633 // are only meaningful for array-section, so we insert a null for anything
7634 // other than array-section.
7635 // Also, the size of offset, count, and stride are not the same as
7636 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7637 // count, and stride are the same as the number of non-contiguous
7638 // declaration in target update to/from clause.
7639 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7640 Components) {
7641 const Expr *AssocExpr = Component.getAssociatedExpression();
7643 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7644 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7645 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7646 /*isSigned=*/false);
7647 CurOffsets.push_back(Offset);
7648 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7649 CurStrides.push_back(CurStrides.back());
7650 continue;
7653 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7655 if (!OASE)
7656 continue;
7658 // Offset
7659 const Expr *OffsetExpr = OASE->getLowerBound();
7660 llvm::Value *Offset = nullptr;
7661 if (!OffsetExpr) {
7662 // If offset is absent, then we just set it to zero.
7663 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7664 } else {
7665 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7666 CGF.Int64Ty,
7667 /*isSigned=*/false);
7669 CurOffsets.push_back(Offset);
7671 // Count
7672 const Expr *CountExpr = OASE->getLength();
7673 llvm::Value *Count = nullptr;
7674 if (!CountExpr) {
7675 // In Clang, once a high dimension is an array section, we construct all
7676 // the lower dimension as array section, however, for case like
7677 // arr[0:2][2], Clang construct the inner dimension as an array section
7678 // but it actually is not in an array section form according to spec.
7679 if (!OASE->getColonLocFirst().isValid() &&
7680 !OASE->getColonLocSecond().isValid()) {
7681 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7682 } else {
7683 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7684 // When the length is absent it defaults to ⌈(size −
7685 // lower-bound)/stride⌉, where size is the size of the array
7686 // dimension.
7687 const Expr *StrideExpr = OASE->getStride();
7688 llvm::Value *Stride =
7689 StrideExpr
7690 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7691 CGF.Int64Ty, /*isSigned=*/false)
7692 : nullptr;
7693 if (Stride)
7694 Count = CGF.Builder.CreateUDiv(
7695 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7696 else
7697 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7699 } else {
7700 Count = CGF.EmitScalarExpr(CountExpr);
7702 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7703 CurCounts.push_back(Count);
7705 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7706 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7707 // Offset Count Stride
7708 // D0 0 1 4 (int) <- dummy dimension
7709 // D1 0 2 8 (2 * (1) * 4)
7710 // D2 1 2 20 (1 * (1 * 5) * 4)
7711 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7712 const Expr *StrideExpr = OASE->getStride();
7713 llvm::Value *Stride =
7714 StrideExpr
7715 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7716 CGF.Int64Ty, /*isSigned=*/false)
7717 : nullptr;
7718 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7719 if (Stride)
7720 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7721 else
7722 CurStrides.push_back(DimProd);
7723 if (DI != DimSizes.end())
7724 ++DI;
7727 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7728 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7729 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7732 /// Return the adjusted map modifiers if the declaration a capture refers to
7733 /// appears in a first-private clause. This is expected to be used only with
7734 /// directives that start with 'target'.
7735 OpenMPOffloadMappingFlags
7736 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7737 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7739 // A first private variable captured by reference will use only the
7740 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7741 // declaration is known as first-private in this handler.
7742 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7743 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7744 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7745 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7746 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7747 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7749 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7750 if (I != LambdasMap.end())
7751 // for map(to: lambda): using user specified map type.
7752 return getMapTypeBits(
7753 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7754 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7755 /*AddPtrFlag=*/false,
7756 /*AddIsTargetParamFlag=*/false,
7757 /*isNonContiguous=*/false);
7758 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7759 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7762 void getPlainLayout(const CXXRecordDecl *RD,
7763 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7764 bool AsBase) const {
7765 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7767 llvm::StructType *St =
7768 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7770 unsigned NumElements = St->getNumElements();
7771 llvm::SmallVector<
7772 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7773 RecordLayout(NumElements);
7775 // Fill bases.
7776 for (const auto &I : RD->bases()) {
7777 if (I.isVirtual())
7778 continue;
7779 const auto *Base = I.getType()->getAsCXXRecordDecl();
7780 // Ignore empty bases.
7781 if (Base->isEmpty() || CGF.getContext()
7782 .getASTRecordLayout(Base)
7783 .getNonVirtualSize()
7784 .isZero())
7785 continue;
7787 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7788 RecordLayout[FieldIndex] = Base;
7790 // Fill in virtual bases.
7791 for (const auto &I : RD->vbases()) {
7792 const auto *Base = I.getType()->getAsCXXRecordDecl();
7793 // Ignore empty bases.
7794 if (Base->isEmpty())
7795 continue;
7796 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7797 if (RecordLayout[FieldIndex])
7798 continue;
7799 RecordLayout[FieldIndex] = Base;
7801 // Fill in all the fields.
7802 assert(!RD->isUnion() && "Unexpected union.");
7803 for (const auto *Field : RD->fields()) {
7804 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7805 // will fill in later.)
7806 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7807 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7808 RecordLayout[FieldIndex] = Field;
7811 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7812 &Data : RecordLayout) {
7813 if (Data.isNull())
7814 continue;
7815 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7816 getPlainLayout(Base, Layout, /*AsBase=*/true);
7817 else
7818 Layout.push_back(Data.get<const FieldDecl *>());
7822 /// Generate all the base pointers, section pointers, sizes, map types, and
7823 /// mappers for the extracted mappable expressions (all included in \a
7824 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7825 /// pair of the relevant declaration and index where it occurs is appended to
7826 /// the device pointers info array.
7827 void generateAllInfoForClauses(
7828 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7829 llvm::OpenMPIRBuilder &OMPBuilder,
7830 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7831 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7832 // We have to process the component lists that relate with the same
7833 // declaration in a single chunk so that we can generate the map flags
7834 // correctly. Therefore, we organize all lists in a map.
7835 enum MapKind { Present, Allocs, Other, Total };
7836 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7837 SmallVector<SmallVector<MapInfo, 8>, 4>>
7838 Info;
7840 // Helper function to fill the information map for the different supported
7841 // clauses.
7842 auto &&InfoGen =
7843 [&Info, &SkipVarSet](
7844 const ValueDecl *D, MapKind Kind,
7845 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7846 OpenMPMapClauseKind MapType,
7847 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7848 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7849 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7850 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7851 if (SkipVarSet.contains(D))
7852 return;
7853 auto It = Info.find(D);
7854 if (It == Info.end())
7855 It = Info
7856 .insert(std::make_pair(
7857 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7858 .first;
7859 It->second[Kind].emplace_back(
7860 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7861 IsImplicit, Mapper, VarRef, ForDeviceAddr);
7864 for (const auto *Cl : Clauses) {
7865 const auto *C = dyn_cast<OMPMapClause>(Cl);
7866 if (!C)
7867 continue;
7868 MapKind Kind = Other;
7869 if (llvm::is_contained(C->getMapTypeModifiers(),
7870 OMPC_MAP_MODIFIER_present))
7871 Kind = Present;
7872 else if (C->getMapType() == OMPC_MAP_alloc)
7873 Kind = Allocs;
7874 const auto *EI = C->getVarRefs().begin();
7875 for (const auto L : C->component_lists()) {
7876 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7877 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7878 C->getMapTypeModifiers(), std::nullopt,
7879 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7881 ++EI;
7884 for (const auto *Cl : Clauses) {
7885 const auto *C = dyn_cast<OMPToClause>(Cl);
7886 if (!C)
7887 continue;
7888 MapKind Kind = Other;
7889 if (llvm::is_contained(C->getMotionModifiers(),
7890 OMPC_MOTION_MODIFIER_present))
7891 Kind = Present;
7892 const auto *EI = C->getVarRefs().begin();
7893 for (const auto L : C->component_lists()) {
7894 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7895 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7896 C->isImplicit(), std::get<2>(L), *EI);
7897 ++EI;
7900 for (const auto *Cl : Clauses) {
7901 const auto *C = dyn_cast<OMPFromClause>(Cl);
7902 if (!C)
7903 continue;
7904 MapKind Kind = Other;
7905 if (llvm::is_contained(C->getMotionModifiers(),
7906 OMPC_MOTION_MODIFIER_present))
7907 Kind = Present;
7908 const auto *EI = C->getVarRefs().begin();
7909 for (const auto L : C->component_lists()) {
7910 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7911 std::nullopt, C->getMotionModifiers(),
7912 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7913 *EI);
7914 ++EI;
7918 // Look at the use_device_ptr and use_device_addr clauses information and
7919 // mark the existing map entries as such. If there is no map information for
7920 // an entry in the use_device_ptr and use_device_addr list, we create one
7921 // with map type 'alloc' and zero size section. It is the user fault if that
7922 // was not mapped before. If there is no map information and the pointer is
7923 // a struct member, then we defer the emission of that entry until the whole
7924 // struct has been processed.
7925 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7926 SmallVector<DeferredDevicePtrEntryTy, 4>>
7927 DeferredInfo;
7928 MapCombinedInfoTy UseDeviceDataCombinedInfo;
7930 auto &&UseDeviceDataCombinedInfoGen =
7931 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7932 CodeGenFunction &CGF, bool IsDevAddr) {
7933 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7934 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7935 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7936 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7937 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7938 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7939 UseDeviceDataCombinedInfo.Sizes.push_back(
7940 llvm::Constant::getNullValue(CGF.Int64Ty));
7941 UseDeviceDataCombinedInfo.Types.push_back(
7942 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7943 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7946 auto &&MapInfoGen =
7947 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7948 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7949 OMPClauseMappableExprCommon::MappableExprComponentListRef
7950 Components,
7951 bool IsImplicit, bool IsDevAddr) {
7952 // We didn't find any match in our map information - generate a zero
7953 // size array section - if the pointer is a struct member we defer
7954 // this action until the whole struct has been processed.
7955 if (isa<MemberExpr>(IE)) {
7956 // Insert the pointer into Info to be processed by
7957 // generateInfoForComponentList. Because it is a member pointer
7958 // without a pointee, no entry will be generated for it, therefore
7959 // we need to generate one after the whole struct has been
7960 // processed. Nonetheless, generateInfoForComponentList must be
7961 // called to take the pointer into account for the calculation of
7962 // the range of the partial struct.
7963 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7964 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7965 nullptr, nullptr, IsDevAddr);
7966 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7967 } else {
7968 llvm::Value *Ptr;
7969 if (IsDevAddr) {
7970 if (IE->isGLValue())
7971 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7972 else
7973 Ptr = CGF.EmitScalarExpr(IE);
7974 } else {
7975 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7977 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7981 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7982 const Expr *IE, bool IsDevAddr) -> bool {
7983 // We potentially have map information for this declaration already.
7984 // Look for the first set of components that refer to it. If found,
7985 // return true.
7986 // If the first component is a member expression, we have to look into
7987 // 'this', which maps to null in the map of map information. Otherwise
7988 // look directly for the information.
7989 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7990 if (It != Info.end()) {
7991 bool Found = false;
7992 for (auto &Data : It->second) {
7993 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7994 return MI.Components.back().getAssociatedDeclaration() == VD;
7996 // If we found a map entry, signal that the pointer has to be
7997 // returned and move on to the next declaration. Exclude cases where
7998 // the base pointer is mapped as array subscript, array section or
7999 // array shaping. The base address is passed as a pointer to base in
8000 // this case and cannot be used as a base for use_device_ptr list
8001 // item.
8002 if (CI != Data.end()) {
8003 if (IsDevAddr) {
8004 CI->ForDeviceAddr = IsDevAddr;
8005 CI->ReturnDevicePointer = true;
8006 Found = true;
8007 break;
8008 } else {
8009 auto PrevCI = std::next(CI->Components.rbegin());
8010 const auto *VarD = dyn_cast<VarDecl>(VD);
8011 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8012 isa<MemberExpr>(IE) ||
8013 !VD->getType().getNonReferenceType()->isPointerType() ||
8014 PrevCI == CI->Components.rend() ||
8015 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8016 VarD->hasLocalStorage()) {
8017 CI->ForDeviceAddr = IsDevAddr;
8018 CI->ReturnDevicePointer = true;
8019 Found = true;
8020 break;
8025 return Found;
8027 return false;
8030 // Look at the use_device_ptr clause information and mark the existing map
8031 // entries as such. If there is no map information for an entry in the
8032 // use_device_ptr list, we create one with map type 'alloc' and zero size
8033 // section. It is the user fault if that was not mapped before. If there is
8034 // no map information and the pointer is a struct member, then we defer the
8035 // emission of that entry until the whole struct has been processed.
8036 for (const auto *Cl : Clauses) {
8037 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8038 if (!C)
8039 continue;
8040 for (const auto L : C->component_lists()) {
8041 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8042 std::get<1>(L);
8043 assert(!Components.empty() &&
8044 "Not expecting empty list of components!");
8045 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8046 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8047 const Expr *IE = Components.back().getAssociatedExpression();
8048 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8049 continue;
8050 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8051 /*IsDevAddr=*/false);
8055 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8056 for (const auto *Cl : Clauses) {
8057 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8058 if (!C)
8059 continue;
8060 for (const auto L : C->component_lists()) {
8061 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8062 std::get<1>(L);
8063 assert(!std::get<1>(L).empty() &&
8064 "Not expecting empty list of components!");
8065 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8066 if (!Processed.insert(VD).second)
8067 continue;
8068 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8069 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8070 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8071 continue;
8072 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8073 /*IsDevAddr=*/true);
8077 for (const auto &Data : Info) {
8078 StructRangeInfoTy PartialStruct;
8079 // Temporary generated information.
8080 MapCombinedInfoTy CurInfo;
8081 const Decl *D = Data.first;
8082 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8083 for (const auto &M : Data.second) {
8084 for (const MapInfo &L : M) {
8085 assert(!L.Components.empty() &&
8086 "Not expecting declaration with no component lists.");
8088 // Remember the current base pointer index.
8089 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8090 CurInfo.NonContigInfo.IsNonContiguous =
8091 L.Components.back().isNonContiguous();
8092 generateInfoForComponentList(
8093 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8094 CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8095 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8097 // If this entry relates with a device pointer, set the relevant
8098 // declaration and add the 'return pointer' flag.
8099 if (L.ReturnDevicePointer) {
8100 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8101 "Unexpected number of mapped base pointers.");
8103 const ValueDecl *RelevantVD =
8104 L.Components.back().getAssociatedDeclaration();
8105 assert(RelevantVD &&
8106 "No relevant declaration related with device pointer??");
8108 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8109 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8110 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer;
8111 CurInfo.Types[CurrentBasePointersIdx] |=
8112 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8117 // Append any pending zero-length pointers which are struct members and
8118 // used with use_device_ptr or use_device_addr.
8119 auto CI = DeferredInfo.find(Data.first);
8120 if (CI != DeferredInfo.end()) {
8121 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8122 llvm::Value *BasePtr;
8123 llvm::Value *Ptr;
8124 if (L.ForDeviceAddr) {
8125 if (L.IE->isGLValue())
8126 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8127 else
8128 Ptr = this->CGF.EmitScalarExpr(L.IE);
8129 BasePtr = Ptr;
8130 // Entry is RETURN_PARAM. Also, set the placeholder value
8131 // MEMBER_OF=FFFF so that the entry is later updated with the
8132 // correct value of MEMBER_OF.
8133 CurInfo.Types.push_back(
8134 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8135 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8136 } else {
8137 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8138 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8139 L.IE->getExprLoc());
8140 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8141 // placeholder value MEMBER_OF=FFFF so that the entry is later
8142 // updated with the correct value of MEMBER_OF.
8143 CurInfo.Types.push_back(
8144 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8145 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8146 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8148 CurInfo.Exprs.push_back(L.VD);
8149 CurInfo.BasePointers.emplace_back(BasePtr);
8150 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8151 CurInfo.DevicePointers.emplace_back(
8152 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8153 CurInfo.Pointers.push_back(Ptr);
8154 CurInfo.Sizes.push_back(
8155 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8156 CurInfo.Mappers.push_back(nullptr);
8159 // If there is an entry in PartialStruct it means we have a struct with
8160 // individual members mapped. Emit an extra combined entry.
8161 if (PartialStruct.Base.isValid()) {
8162 CurInfo.NonContigInfo.Dims.push_back(0);
8163 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
8164 /*IsMapThis*/ !VD, OMPBuilder, VD);
8167 // We need to append the results of this capture to what we already
8168 // have.
8169 CombinedInfo.append(CurInfo);
8171 // Append data for use_device_ptr clauses.
8172 CombinedInfo.append(UseDeviceDataCombinedInfo);
8175 public:
8176 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8177 : CurDir(&Dir), CGF(CGF) {
8178 // Extract firstprivate clause information.
8179 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8180 for (const auto *D : C->varlists())
8181 FirstPrivateDecls.try_emplace(
8182 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8183 // Extract implicit firstprivates from uses_allocators clauses.
8184 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8185 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8186 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8187 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8188 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8189 /*Implicit=*/true);
8190 else if (const auto *VD = dyn_cast<VarDecl>(
8191 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8192 ->getDecl()))
8193 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8196 // Extract device pointer clause information.
8197 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8198 for (auto L : C->component_lists())
8199 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8200 // Extract device addr clause information.
8201 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8202 for (auto L : C->component_lists())
8203 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8204 // Extract map information.
8205 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8206 if (C->getMapType() != OMPC_MAP_to)
8207 continue;
8208 for (auto L : C->component_lists()) {
8209 const ValueDecl *VD = std::get<0>(L);
8210 const auto *RD = VD ? VD->getType()
8211 .getCanonicalType()
8212 .getNonReferenceType()
8213 ->getAsCXXRecordDecl()
8214 : nullptr;
8215 if (RD && RD->isLambda())
8216 LambdasMap.try_emplace(std::get<0>(L), C);
8221 /// Constructor for the declare mapper directive.
8222 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8223 : CurDir(&Dir), CGF(CGF) {}
8225 /// Generate code for the combined entry if we have a partially mapped struct
8226 /// and take care of the mapping flags of the arguments corresponding to
8227 /// individual struct members.
8228 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8229 MapFlagsArrayTy &CurTypes,
8230 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8231 llvm::OpenMPIRBuilder &OMPBuilder,
8232 const ValueDecl *VD = nullptr,
8233 bool NotTargetParams = true) const {
8234 if (CurTypes.size() == 1 &&
8235 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8236 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8237 !PartialStruct.IsArraySection)
8238 return;
8239 Address LBAddr = PartialStruct.LowestElem.second;
8240 Address HBAddr = PartialStruct.HighestElem.second;
8241 if (PartialStruct.HasCompleteRecord) {
8242 LBAddr = PartialStruct.LB;
8243 HBAddr = PartialStruct.LB;
8245 CombinedInfo.Exprs.push_back(VD);
8246 // Base is the base of the struct
8247 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8248 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8249 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8250 // Pointer is the address of the lowest element
8251 llvm::Value *LB = LBAddr.getPointer();
8252 const CXXMethodDecl *MD =
8253 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8254 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8255 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8256 // There should not be a mapper for a combined entry.
8257 if (HasBaseClass) {
8258 // OpenMP 5.2 148:21:
8259 // If the target construct is within a class non-static member function,
8260 // and a variable is an accessible data member of the object for which the
8261 // non-static data member function is invoked, the variable is treated as
8262 // if the this[:1] expression had appeared in a map clause with a map-type
8263 // of tofrom.
8264 // Emit this[:1]
8265 CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
8266 QualType Ty = MD->getFunctionObjectParameterType();
8267 llvm::Value *Size =
8268 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8269 /*isSigned=*/true);
8270 CombinedInfo.Sizes.push_back(Size);
8271 } else {
8272 CombinedInfo.Pointers.push_back(LB);
8273 // Size is (addr of {highest+1} element) - (addr of lowest element)
8274 llvm::Value *HB = HBAddr.getPointer();
8275 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8276 HBAddr.getElementType(), HB, /*Idx0=*/1);
8277 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8278 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8279 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8280 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8281 /*isSigned=*/false);
8282 CombinedInfo.Sizes.push_back(Size);
8284 CombinedInfo.Mappers.push_back(nullptr);
8285 // Map type is always TARGET_PARAM, if generate info for captures.
8286 CombinedInfo.Types.push_back(
8287 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8288 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8289 // If any element has the present modifier, then make sure the runtime
8290 // doesn't attempt to allocate the struct.
8291 if (CurTypes.end() !=
8292 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8293 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8294 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8296 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8297 // Remove TARGET_PARAM flag from the first element
8298 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8299 // If any element has the ompx_hold modifier, then make sure the runtime
8300 // uses the hold reference count for the struct as a whole so that it won't
8301 // be unmapped by an extra dynamic reference count decrement. Add it to all
8302 // elements as well so the runtime knows which reference count to check
8303 // when determining whether it's time for device-to-host transfers of
8304 // individual elements.
8305 if (CurTypes.end() !=
8306 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8307 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8308 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8309 })) {
8310 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8311 for (auto &M : CurTypes)
8312 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8315 // All other current entries will be MEMBER_OF the combined entry
8316 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8317 // 0xFFFF in the MEMBER_OF field).
8318 OpenMPOffloadMappingFlags MemberOfFlag =
8319 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8320 for (auto &M : CurTypes)
8321 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8324 /// Generate all the base pointers, section pointers, sizes, map types, and
8325 /// mappers for the extracted mappable expressions (all included in \a
8326 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8327 /// pair of the relevant declaration and index where it occurs is appended to
8328 /// the device pointers info array.
8329 void generateAllInfo(
8330 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8331 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8332 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8333 assert(CurDir.is<const OMPExecutableDirective *>() &&
8334 "Expect a executable directive");
8335 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8336 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8337 SkipVarSet);
8340 /// Generate all the base pointers, section pointers, sizes, map types, and
8341 /// mappers for the extracted map clauses of user-defined mapper (all included
8342 /// in \a CombinedInfo).
8343 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8344 llvm::OpenMPIRBuilder &OMPBuilder) const {
8345 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8346 "Expect a declare mapper directive");
8347 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8348 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8349 OMPBuilder);
8352 /// Emit capture info for lambdas for variables captured by reference.
8353 void generateInfoForLambdaCaptures(
8354 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8355 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8356 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8357 const auto *RD = VDType->getAsCXXRecordDecl();
8358 if (!RD || !RD->isLambda())
8359 return;
8360 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8361 CGF.getContext().getDeclAlign(VD));
8362 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8363 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8364 FieldDecl *ThisCapture = nullptr;
8365 RD->getCaptureFields(Captures, ThisCapture);
8366 if (ThisCapture) {
8367 LValue ThisLVal =
8368 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8369 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8370 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8371 VDLVal.getPointer(CGF));
8372 CombinedInfo.Exprs.push_back(VD);
8373 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8374 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8375 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8376 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8377 CombinedInfo.Sizes.push_back(
8378 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8379 CGF.Int64Ty, /*isSigned=*/true));
8380 CombinedInfo.Types.push_back(
8381 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8382 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8383 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8384 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8385 CombinedInfo.Mappers.push_back(nullptr);
8387 for (const LambdaCapture &LC : RD->captures()) {
8388 if (!LC.capturesVariable())
8389 continue;
8390 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8391 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8392 continue;
8393 auto It = Captures.find(VD);
8394 assert(It != Captures.end() && "Found lambda capture without field.");
8395 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8396 if (LC.getCaptureKind() == LCK_ByRef) {
8397 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8398 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8399 VDLVal.getPointer(CGF));
8400 CombinedInfo.Exprs.push_back(VD);
8401 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8402 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8403 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8404 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8405 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8406 CGF.getTypeSize(
8407 VD->getType().getCanonicalType().getNonReferenceType()),
8408 CGF.Int64Ty, /*isSigned=*/true));
8409 } else {
8410 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8411 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8412 VDLVal.getPointer(CGF));
8413 CombinedInfo.Exprs.push_back(VD);
8414 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8415 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8416 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8417 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8418 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8420 CombinedInfo.Types.push_back(
8421 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8422 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8423 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8424 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8425 CombinedInfo.Mappers.push_back(nullptr);
8429 /// Set correct indices for lambdas captures.
8430 void adjustMemberOfForLambdaCaptures(
8431 llvm::OpenMPIRBuilder &OMPBuilder,
8432 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8433 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8434 MapFlagsArrayTy &Types) const {
8435 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8436 // Set correct member_of idx for all implicit lambda captures.
8437 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8438 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8439 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8440 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8441 continue;
8442 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8443 assert(BasePtr && "Unable to find base lambda address.");
8444 int TgtIdx = -1;
8445 for (unsigned J = I; J > 0; --J) {
8446 unsigned Idx = J - 1;
8447 if (Pointers[Idx] != BasePtr)
8448 continue;
8449 TgtIdx = Idx;
8450 break;
8452 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8453 // All other current entries will be MEMBER_OF the combined entry
8454 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8455 // 0xFFFF in the MEMBER_OF field).
8456 OpenMPOffloadMappingFlags MemberOfFlag =
8457 OMPBuilder.getMemberOfFlag(TgtIdx);
8458 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8462 /// Generate the base pointers, section pointers, sizes, map types, and
8463 /// mappers associated to a given capture (all included in \a CombinedInfo).
8464 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8465 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8466 StructRangeInfoTy &PartialStruct) const {
8467 assert(!Cap->capturesVariableArrayType() &&
8468 "Not expecting to generate map info for a variable array type!");
8470 // We need to know when we generating information for the first component
8471 const ValueDecl *VD = Cap->capturesThis()
8472 ? nullptr
8473 : Cap->getCapturedVar()->getCanonicalDecl();
8475 // for map(to: lambda): skip here, processing it in
8476 // generateDefaultMapInfo
8477 if (LambdasMap.count(VD))
8478 return;
8480 // If this declaration appears in a is_device_ptr clause we just have to
8481 // pass the pointer by value. If it is a reference to a declaration, we just
8482 // pass its value.
8483 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8484 CombinedInfo.Exprs.push_back(VD);
8485 CombinedInfo.BasePointers.emplace_back(Arg);
8486 CombinedInfo.DevicePtrDecls.emplace_back(VD);
8487 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8488 CombinedInfo.Pointers.push_back(Arg);
8489 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8490 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8491 /*isSigned=*/true));
8492 CombinedInfo.Types.push_back(
8493 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8494 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8495 CombinedInfo.Mappers.push_back(nullptr);
8496 return;
8499 using MapData =
8500 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8501 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8502 const ValueDecl *, const Expr *>;
8503 SmallVector<MapData, 4> DeclComponentLists;
8504 // For member fields list in is_device_ptr, store it in
8505 // DeclComponentLists for generating components info.
8506 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8507 auto It = DevPointersMap.find(VD);
8508 if (It != DevPointersMap.end())
8509 for (const auto &MCL : It->second)
8510 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8511 /*IsImpicit = */ true, nullptr,
8512 nullptr);
8513 auto I = HasDevAddrsMap.find(VD);
8514 if (I != HasDevAddrsMap.end())
8515 for (const auto &MCL : I->second)
8516 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8517 /*IsImpicit = */ true, nullptr,
8518 nullptr);
8519 assert(CurDir.is<const OMPExecutableDirective *>() &&
8520 "Expect a executable directive");
8521 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8522 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8523 const auto *EI = C->getVarRefs().begin();
8524 for (const auto L : C->decl_component_lists(VD)) {
8525 const ValueDecl *VDecl, *Mapper;
8526 // The Expression is not correct if the mapping is implicit
8527 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8528 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8529 std::tie(VDecl, Components, Mapper) = L;
8530 assert(VDecl == VD && "We got information for the wrong declaration??");
8531 assert(!Components.empty() &&
8532 "Not expecting declaration with no component lists.");
8533 DeclComponentLists.emplace_back(Components, C->getMapType(),
8534 C->getMapTypeModifiers(),
8535 C->isImplicit(), Mapper, E);
8536 ++EI;
8539 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8540 const MapData &RHS) {
8541 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8542 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8543 bool HasPresent =
8544 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8545 bool HasAllocs = MapType == OMPC_MAP_alloc;
8546 MapModifiers = std::get<2>(RHS);
8547 MapType = std::get<1>(LHS);
8548 bool HasPresentR =
8549 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8550 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8551 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8554 // Find overlapping elements (including the offset from the base element).
8555 llvm::SmallDenseMap<
8556 const MapData *,
8557 llvm::SmallVector<
8558 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8560 OverlappedData;
8561 size_t Count = 0;
8562 for (const MapData &L : DeclComponentLists) {
8563 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8564 OpenMPMapClauseKind MapType;
8565 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8566 bool IsImplicit;
8567 const ValueDecl *Mapper;
8568 const Expr *VarRef;
8569 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8571 ++Count;
8572 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8573 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8574 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8575 VarRef) = L1;
8576 auto CI = Components.rbegin();
8577 auto CE = Components.rend();
8578 auto SI = Components1.rbegin();
8579 auto SE = Components1.rend();
8580 for (; CI != CE && SI != SE; ++CI, ++SI) {
8581 if (CI->getAssociatedExpression()->getStmtClass() !=
8582 SI->getAssociatedExpression()->getStmtClass())
8583 break;
8584 // Are we dealing with different variables/fields?
8585 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8586 break;
8588 // Found overlapping if, at least for one component, reached the head
8589 // of the components list.
8590 if (CI == CE || SI == SE) {
8591 // Ignore it if it is the same component.
8592 if (CI == CE && SI == SE)
8593 continue;
8594 const auto It = (SI == SE) ? CI : SI;
8595 // If one component is a pointer and another one is a kind of
8596 // dereference of this pointer (array subscript, section, dereference,
8597 // etc.), it is not an overlapping.
8598 // Same, if one component is a base and another component is a
8599 // dereferenced pointer memberexpr with the same base.
8600 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8601 (std::prev(It)->getAssociatedDeclaration() &&
8602 std::prev(It)
8603 ->getAssociatedDeclaration()
8604 ->getType()
8605 ->isPointerType()) ||
8606 (It->getAssociatedDeclaration() &&
8607 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8608 std::next(It) != CE && std::next(It) != SE))
8609 continue;
8610 const MapData &BaseData = CI == CE ? L : L1;
8611 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8612 SI == SE ? Components : Components1;
8613 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8614 OverlappedElements.getSecond().push_back(SubData);
8618 // Sort the overlapped elements for each item.
8619 llvm::SmallVector<const FieldDecl *, 4> Layout;
8620 if (!OverlappedData.empty()) {
8621 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8622 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8623 while (BaseType != OrigType) {
8624 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8625 OrigType = BaseType->getPointeeOrArrayElementType();
8628 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8629 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8630 else {
8631 const auto *RD = BaseType->getAsRecordDecl();
8632 Layout.append(RD->field_begin(), RD->field_end());
8635 for (auto &Pair : OverlappedData) {
8636 llvm::stable_sort(
8637 Pair.getSecond(),
8638 [&Layout](
8639 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8640 OMPClauseMappableExprCommon::MappableExprComponentListRef
8641 Second) {
8642 auto CI = First.rbegin();
8643 auto CE = First.rend();
8644 auto SI = Second.rbegin();
8645 auto SE = Second.rend();
8646 for (; CI != CE && SI != SE; ++CI, ++SI) {
8647 if (CI->getAssociatedExpression()->getStmtClass() !=
8648 SI->getAssociatedExpression()->getStmtClass())
8649 break;
8650 // Are we dealing with different variables/fields?
8651 if (CI->getAssociatedDeclaration() !=
8652 SI->getAssociatedDeclaration())
8653 break;
8656 // Lists contain the same elements.
8657 if (CI == CE && SI == SE)
8658 return false;
8660 // List with less elements is less than list with more elements.
8661 if (CI == CE || SI == SE)
8662 return CI == CE;
8664 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8665 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8666 if (FD1->getParent() == FD2->getParent())
8667 return FD1->getFieldIndex() < FD2->getFieldIndex();
8668 const auto *It =
8669 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8670 return FD == FD1 || FD == FD2;
8672 return *It == FD1;
8676 // Associated with a capture, because the mapping flags depend on it.
8677 // Go through all of the elements with the overlapped elements.
8678 bool IsFirstComponentList = true;
8679 for (const auto &Pair : OverlappedData) {
8680 const MapData &L = *Pair.getFirst();
8681 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8682 OpenMPMapClauseKind MapType;
8683 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8684 bool IsImplicit;
8685 const ValueDecl *Mapper;
8686 const Expr *VarRef;
8687 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8689 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8690 OverlappedComponents = Pair.getSecond();
8691 generateInfoForComponentList(
8692 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8693 PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8694 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8695 IsFirstComponentList = false;
8697 // Go through other elements without overlapped elements.
8698 for (const MapData &L : DeclComponentLists) {
8699 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8700 OpenMPMapClauseKind MapType;
8701 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8702 bool IsImplicit;
8703 const ValueDecl *Mapper;
8704 const Expr *VarRef;
8705 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8707 auto It = OverlappedData.find(&L);
8708 if (It == OverlappedData.end())
8709 generateInfoForComponentList(MapType, MapModifiers, std::nullopt,
8710 Components, CombinedInfo, PartialStruct,
8711 IsFirstComponentList, IsImplicit, Mapper,
8712 /*ForDeviceAddr=*/false, VD, VarRef);
8713 IsFirstComponentList = false;
8717 /// Generate the default map information for a given capture \a CI,
8718 /// record field declaration \a RI and captured value \a CV.
8719 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8720 const FieldDecl &RI, llvm::Value *CV,
8721 MapCombinedInfoTy &CombinedInfo) const {
8722 bool IsImplicit = true;
8723 // Do the default mapping.
8724 if (CI.capturesThis()) {
8725 CombinedInfo.Exprs.push_back(nullptr);
8726 CombinedInfo.BasePointers.push_back(CV);
8727 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8728 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8729 CombinedInfo.Pointers.push_back(CV);
8730 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8731 CombinedInfo.Sizes.push_back(
8732 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8733 CGF.Int64Ty, /*isSigned=*/true));
8734 // Default map type.
8735 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8736 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8737 } else if (CI.capturesVariableByCopy()) {
8738 const VarDecl *VD = CI.getCapturedVar();
8739 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8740 CombinedInfo.BasePointers.push_back(CV);
8741 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8742 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8743 CombinedInfo.Pointers.push_back(CV);
8744 if (!RI.getType()->isAnyPointerType()) {
8745 // We have to signal to the runtime captures passed by value that are
8746 // not pointers.
8747 CombinedInfo.Types.push_back(
8748 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8749 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8750 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8751 } else {
8752 // Pointers are implicitly mapped with a zero size and no flags
8753 // (other than first map that is added for all implicit maps).
8754 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8755 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8757 auto I = FirstPrivateDecls.find(VD);
8758 if (I != FirstPrivateDecls.end())
8759 IsImplicit = I->getSecond();
8760 } else {
8761 assert(CI.capturesVariable() && "Expected captured reference.");
8762 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8763 QualType ElementType = PtrTy->getPointeeType();
8764 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8765 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8766 // The default map type for a scalar/complex type is 'to' because by
8767 // default the value doesn't have to be retrieved. For an aggregate
8768 // type, the default is 'tofrom'.
8769 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8770 const VarDecl *VD = CI.getCapturedVar();
8771 auto I = FirstPrivateDecls.find(VD);
8772 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8773 CombinedInfo.BasePointers.push_back(CV);
8774 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8775 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8776 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8777 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8778 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8779 AlignmentSource::Decl));
8780 CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8781 } else {
8782 CombinedInfo.Pointers.push_back(CV);
8784 if (I != FirstPrivateDecls.end())
8785 IsImplicit = I->getSecond();
8787 // Every default map produces a single argument which is a target parameter.
8788 CombinedInfo.Types.back() |=
8789 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8791 // Add flag stating this is an implicit map.
8792 if (IsImplicit)
8793 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8795 // No user-defined mapper for default mapping.
8796 CombinedInfo.Mappers.push_back(nullptr);
8799 } // anonymous namespace
8801 // Try to extract the base declaration from a `this->x` expression if possible.
8802 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8803 if (!E)
8804 return nullptr;
8806 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
8807 if (const MemberExpr *ME =
8808 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8809 return ME->getMemberDecl();
8810 return nullptr;
8813 /// Emit a string constant containing the names of the values mapped to the
8814 /// offloading runtime library.
8815 llvm::Constant *
8816 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8817 MappableExprsHandler::MappingExprInfo &MapExprs) {
8819 uint32_t SrcLocStrSize;
8820 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8821 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8823 SourceLocation Loc;
8824 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8825 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8826 Loc = VD->getLocation();
8827 else
8828 Loc = MapExprs.getMapExpr()->getExprLoc();
8829 } else {
8830 Loc = MapExprs.getMapDecl()->getLocation();
8833 std::string ExprName;
8834 if (MapExprs.getMapExpr()) {
8835 PrintingPolicy P(CGF.getContext().getLangOpts());
8836 llvm::raw_string_ostream OS(ExprName);
8837 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8838 OS.flush();
8839 } else {
8840 ExprName = MapExprs.getMapDecl()->getNameAsString();
8843 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
8844 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8845 PLoc.getLine(), PLoc.getColumn(),
8846 SrcLocStrSize);
8849 /// Emit the arrays used to pass the captures and map information to the
8850 /// offloading runtime library. If there is no map or capture information,
8851 /// return nullptr by reference.
8852 static void emitOffloadingArrays(
8853 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8854 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8855 bool IsNonContiguous = false) {
8856 CodeGenModule &CGM = CGF.CGM;
8858 // Reset the array information.
8859 Info.clearArrayInfo();
8860 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8862 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8863 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8864 CGF.AllocaInsertPt->getIterator());
8865 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8866 CGF.Builder.GetInsertPoint());
8868 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8869 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
8871 if (CGM.getCodeGenOpts().getDebugInfo() !=
8872 llvm::codegenoptions::NoDebugInfo) {
8873 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
8874 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
8875 FillInfoMap);
8878 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8879 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8880 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8884 auto CustomMapperCB = [&](unsigned int I) {
8885 llvm::Value *MFunc = nullptr;
8886 if (CombinedInfo.Mappers[I]) {
8887 Info.HasMapper = true;
8888 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8889 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8891 return MFunc;
8893 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8894 /*IsNonContiguous=*/true, DeviceAddrCB,
8895 CustomMapperCB);
8898 /// Check for inner distribute directive.
8899 static const OMPExecutableDirective *
8900 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8901 const auto *CS = D.getInnermostCapturedStmt();
8902 const auto *Body =
8903 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8904 const Stmt *ChildStmt =
8905 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8907 if (const auto *NestedDir =
8908 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8909 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8910 switch (D.getDirectiveKind()) {
8911 case OMPD_target:
8912 // For now, just treat 'target teams loop' as if it's distributed.
8913 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8914 return NestedDir;
8915 if (DKind == OMPD_teams) {
8916 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8917 /*IgnoreCaptured=*/true);
8918 if (!Body)
8919 return nullptr;
8920 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8921 if (const auto *NND =
8922 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8923 DKind = NND->getDirectiveKind();
8924 if (isOpenMPDistributeDirective(DKind))
8925 return NND;
8928 return nullptr;
8929 case OMPD_target_teams:
8930 if (isOpenMPDistributeDirective(DKind))
8931 return NestedDir;
8932 return nullptr;
8933 case OMPD_target_parallel:
8934 case OMPD_target_simd:
8935 case OMPD_target_parallel_for:
8936 case OMPD_target_parallel_for_simd:
8937 return nullptr;
8938 case OMPD_target_teams_distribute:
8939 case OMPD_target_teams_distribute_simd:
8940 case OMPD_target_teams_distribute_parallel_for:
8941 case OMPD_target_teams_distribute_parallel_for_simd:
8942 case OMPD_parallel:
8943 case OMPD_for:
8944 case OMPD_parallel_for:
8945 case OMPD_parallel_master:
8946 case OMPD_parallel_sections:
8947 case OMPD_for_simd:
8948 case OMPD_parallel_for_simd:
8949 case OMPD_cancel:
8950 case OMPD_cancellation_point:
8951 case OMPD_ordered:
8952 case OMPD_threadprivate:
8953 case OMPD_allocate:
8954 case OMPD_task:
8955 case OMPD_simd:
8956 case OMPD_tile:
8957 case OMPD_unroll:
8958 case OMPD_sections:
8959 case OMPD_section:
8960 case OMPD_single:
8961 case OMPD_master:
8962 case OMPD_critical:
8963 case OMPD_taskyield:
8964 case OMPD_barrier:
8965 case OMPD_taskwait:
8966 case OMPD_taskgroup:
8967 case OMPD_atomic:
8968 case OMPD_flush:
8969 case OMPD_depobj:
8970 case OMPD_scan:
8971 case OMPD_teams:
8972 case OMPD_target_data:
8973 case OMPD_target_exit_data:
8974 case OMPD_target_enter_data:
8975 case OMPD_distribute:
8976 case OMPD_distribute_simd:
8977 case OMPD_distribute_parallel_for:
8978 case OMPD_distribute_parallel_for_simd:
8979 case OMPD_teams_distribute:
8980 case OMPD_teams_distribute_simd:
8981 case OMPD_teams_distribute_parallel_for:
8982 case OMPD_teams_distribute_parallel_for_simd:
8983 case OMPD_target_update:
8984 case OMPD_declare_simd:
8985 case OMPD_declare_variant:
8986 case OMPD_begin_declare_variant:
8987 case OMPD_end_declare_variant:
8988 case OMPD_declare_target:
8989 case OMPD_end_declare_target:
8990 case OMPD_declare_reduction:
8991 case OMPD_declare_mapper:
8992 case OMPD_taskloop:
8993 case OMPD_taskloop_simd:
8994 case OMPD_master_taskloop:
8995 case OMPD_master_taskloop_simd:
8996 case OMPD_parallel_master_taskloop:
8997 case OMPD_parallel_master_taskloop_simd:
8998 case OMPD_requires:
8999 case OMPD_metadirective:
9000 case OMPD_unknown:
9001 default:
9002 llvm_unreachable("Unexpected directive.");
9006 return nullptr;
9009 /// Emit the user-defined mapper function. The code generation follows the
9010 /// pattern in the example below.
9011 /// \code
9012 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9013 /// void *base, void *begin,
9014 /// int64_t size, int64_t type,
9015 /// void *name = nullptr) {
9016 /// // Allocate space for an array section first or add a base/begin for
9017 /// // pointer dereference.
9018 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9019 /// !maptype.IsDelete)
9020 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9021 /// size*sizeof(Ty), clearToFromMember(type));
9022 /// // Map members.
9023 /// for (unsigned i = 0; i < size; i++) {
9024 /// // For each component specified by this mapper:
9025 /// for (auto c : begin[i]->all_components) {
9026 /// if (c.hasMapper())
9027 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9028 /// c.arg_type, c.arg_name);
9029 /// else
9030 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9031 /// c.arg_begin, c.arg_size, c.arg_type,
9032 /// c.arg_name);
9033 /// }
9034 /// }
9035 /// // Delete the array section.
9036 /// if (size > 1 && maptype.IsDelete)
9037 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9038 /// size*sizeof(Ty), clearToFromMember(type));
9039 /// }
9040 /// \endcode
9041 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9042 CodeGenFunction *CGF) {
9043 if (UDMMap.count(D) > 0)
9044 return;
9045 ASTContext &C = CGM.getContext();
9046 QualType Ty = D->getType();
9047 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9048 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9049 auto *MapperVarDecl =
9050 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9051 SourceLocation Loc = D->getLocation();
9052 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9053 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9055 // Prepare mapper function arguments and attributes.
9056 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9057 C.VoidPtrTy, ImplicitParamDecl::Other);
9058 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9059 ImplicitParamDecl::Other);
9060 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9061 C.VoidPtrTy, ImplicitParamDecl::Other);
9062 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9063 ImplicitParamDecl::Other);
9064 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9065 ImplicitParamDecl::Other);
9066 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9067 ImplicitParamDecl::Other);
9068 FunctionArgList Args;
9069 Args.push_back(&HandleArg);
9070 Args.push_back(&BaseArg);
9071 Args.push_back(&BeginArg);
9072 Args.push_back(&SizeArg);
9073 Args.push_back(&TypeArg);
9074 Args.push_back(&NameArg);
9075 const CGFunctionInfo &FnInfo =
9076 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9077 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9078 SmallString<64> TyStr;
9079 llvm::raw_svector_ostream Out(TyStr);
9080 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9081 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9082 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9083 Name, &CGM.getModule());
9084 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9085 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9086 // Start the mapper function code generation.
9087 CodeGenFunction MapperCGF(CGM);
9088 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9089 // Compute the starting and end addresses of array elements.
9090 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9091 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9092 C.getPointerType(Int64Ty), Loc);
9093 // Prepare common arguments for array initiation and deletion.
9094 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9095 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9096 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9097 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9098 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9099 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9100 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9101 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9102 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9103 // Convert the size in bytes into the number of array elements.
9104 Size = MapperCGF.Builder.CreateExactUDiv(
9105 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9106 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9107 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9108 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9109 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9110 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9111 C.getPointerType(Int64Ty), Loc);
9112 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9113 MapperCGF.GetAddrOfLocalVar(&NameArg),
9114 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9116 // Emit array initiation if this is an array section and \p MapType indicates
9117 // that memory allocation is required.
9118 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9119 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9120 MapName, ElementSize, HeadBB, /*IsInit=*/true);
9122 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9124 // Emit the loop header block.
9125 MapperCGF.EmitBlock(HeadBB);
9126 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9127 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9128 // Evaluate whether the initial condition is satisfied.
9129 llvm::Value *IsEmpty =
9130 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9131 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9132 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9134 // Emit the loop body block.
9135 MapperCGF.EmitBlock(BodyBB);
9136 llvm::BasicBlock *LastBB = BodyBB;
9137 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9138 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9139 PtrPHI->addIncoming(PtrBegin, EntryBB);
9140 Address PtrCurrent(PtrPHI, ElemTy,
9141 MapperCGF.GetAddrOfLocalVar(&BeginArg)
9142 .getAlignment()
9143 .alignmentOfArrayElement(ElementSize));
9144 // Privatize the declared variable of mapper to be the current array element.
9145 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9146 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9147 (void)Scope.Privatize();
9149 // Get map clause information. Fill up the arrays with all mapped variables.
9150 MappableExprsHandler::MapCombinedInfoTy Info;
9151 MappableExprsHandler MEHandler(*D, MapperCGF);
9152 MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
9154 // Call the runtime API __tgt_mapper_num_components to get the number of
9155 // pre-existing components.
9156 llvm::Value *OffloadingArgs[] = {Handle};
9157 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9158 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9159 OMPRTL___tgt_mapper_num_components),
9160 OffloadingArgs);
9161 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9162 PreviousSize,
9163 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9165 // Fill up the runtime mapper handle for all components.
9166 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9167 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9168 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9169 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9170 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9171 llvm::Value *CurSizeArg = Info.Sizes[I];
9172 llvm::Value *CurNameArg =
9173 (CGM.getCodeGenOpts().getDebugInfo() ==
9174 llvm::codegenoptions::NoDebugInfo)
9175 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9176 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9178 // Extract the MEMBER_OF field from the map type.
9179 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9180 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9181 Info.Types[I]));
9182 llvm::Value *MemberMapType =
9183 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9185 // Combine the map type inherited from user-defined mapper with that
9186 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9187 // bits of the \a MapType, which is the input argument of the mapper
9188 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9189 // bits of MemberMapType.
9190 // [OpenMP 5.0], 1.2.6. map-type decay.
9191 // | alloc | to | from | tofrom | release | delete
9192 // ----------------------------------------------------------
9193 // alloc | alloc | alloc | alloc | alloc | release | delete
9194 // to | alloc | to | alloc | to | release | delete
9195 // from | alloc | alloc | from | from | release | delete
9196 // tofrom | alloc | to | from | tofrom | release | delete
9197 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9198 MapType,
9199 MapperCGF.Builder.getInt64(
9200 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9201 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9202 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9203 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9204 llvm::BasicBlock *AllocElseBB =
9205 MapperCGF.createBasicBlock("omp.type.alloc.else");
9206 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9207 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9208 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9209 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9210 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9211 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9212 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9213 MapperCGF.EmitBlock(AllocBB);
9214 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9215 MemberMapType,
9216 MapperCGF.Builder.getInt64(
9217 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9218 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9219 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9220 MapperCGF.Builder.CreateBr(EndBB);
9221 MapperCGF.EmitBlock(AllocElseBB);
9222 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9223 LeftToFrom,
9224 MapperCGF.Builder.getInt64(
9225 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9226 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9227 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9228 // In case of to, clear OMP_MAP_FROM.
9229 MapperCGF.EmitBlock(ToBB);
9230 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9231 MemberMapType,
9232 MapperCGF.Builder.getInt64(
9233 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9234 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9235 MapperCGF.Builder.CreateBr(EndBB);
9236 MapperCGF.EmitBlock(ToElseBB);
9237 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9238 LeftToFrom,
9239 MapperCGF.Builder.getInt64(
9240 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9241 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9242 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9243 // In case of from, clear OMP_MAP_TO.
9244 MapperCGF.EmitBlock(FromBB);
9245 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9246 MemberMapType,
9247 MapperCGF.Builder.getInt64(
9248 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9249 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9250 // In case of tofrom, do nothing.
9251 MapperCGF.EmitBlock(EndBB);
9252 LastBB = EndBB;
9253 llvm::PHINode *CurMapType =
9254 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9255 CurMapType->addIncoming(AllocMapType, AllocBB);
9256 CurMapType->addIncoming(ToMapType, ToBB);
9257 CurMapType->addIncoming(FromMapType, FromBB);
9258 CurMapType->addIncoming(MemberMapType, ToElseBB);
9260 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9261 CurSizeArg, CurMapType, CurNameArg};
9262 if (Info.Mappers[I]) {
9263 // Call the corresponding mapper function.
9264 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9265 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9266 assert(MapperFunc && "Expect a valid mapper function is available.");
9267 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9268 } else {
9269 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9270 // data structure.
9271 MapperCGF.EmitRuntimeCall(
9272 OMPBuilder.getOrCreateRuntimeFunction(
9273 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9274 OffloadingArgs);
9278 // Update the pointer to point to the next element that needs to be mapped,
9279 // and check whether we have mapped all elements.
9280 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9281 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9282 PtrPHI->addIncoming(PtrNext, LastBB);
9283 llvm::Value *IsDone =
9284 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9285 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9286 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9288 MapperCGF.EmitBlock(ExitBB);
9289 // Emit array deletion if this is an array section and \p MapType indicates
9290 // that deletion is required.
9291 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9292 MapName, ElementSize, DoneBB, /*IsInit=*/false);
9294 // Emit the function exit block.
9295 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9296 MapperCGF.FinishFunction();
9297 UDMMap.try_emplace(D, Fn);
9298 if (CGF) {
9299 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9300 Decls.second.push_back(D);
9304 /// Emit the array initialization or deletion portion for user-defined mapper
9305 /// code generation. First, it evaluates whether an array section is mapped and
9306 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9307 /// true, and \a MapType indicates to not delete this array, array
9308 /// initialization code is generated. If \a IsInit is false, and \a MapType
9309 /// indicates to not this array, array deletion code is generated.
9310 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9311 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9312 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9313 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9314 bool IsInit) {
9315 StringRef Prefix = IsInit ? ".init" : ".del";
9317 // Evaluate if this is an array section.
9318 llvm::BasicBlock *BodyBB =
9319 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9320 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9321 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9322 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9323 MapType,
9324 MapperCGF.Builder.getInt64(
9325 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9326 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9327 llvm::Value *DeleteCond;
9328 llvm::Value *Cond;
9329 if (IsInit) {
9330 // base != begin?
9331 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9332 // IsPtrAndObj?
9333 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9334 MapType,
9335 MapperCGF.Builder.getInt64(
9336 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9337 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9338 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9339 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9340 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9341 DeleteCond = MapperCGF.Builder.CreateIsNull(
9342 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9343 } else {
9344 Cond = IsArray;
9345 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9346 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9348 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9349 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9351 MapperCGF.EmitBlock(BodyBB);
9352 // Get the array size by multiplying element size and element number (i.e., \p
9353 // Size).
9354 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9355 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9356 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9357 // memory allocation/deletion purpose only.
9358 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9359 MapType,
9360 MapperCGF.Builder.getInt64(
9361 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9362 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9363 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9364 MapTypeArg = MapperCGF.Builder.CreateOr(
9365 MapTypeArg,
9366 MapperCGF.Builder.getInt64(
9367 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9368 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9370 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9371 // data structure.
9372 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9373 ArraySize, MapTypeArg, MapName};
9374 MapperCGF.EmitRuntimeCall(
9375 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9376 OMPRTL___tgt_push_mapper_component),
9377 OffloadingArgs);
9380 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9381 const OMPDeclareMapperDecl *D) {
9382 auto I = UDMMap.find(D);
9383 if (I != UDMMap.end())
9384 return I->second;
9385 emitUserDefinedMapper(D);
9386 return UDMMap.lookup(D);
9389 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9390 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9391 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9392 const OMPLoopDirective &D)>
9393 SizeEmitter) {
9394 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9395 const OMPExecutableDirective *TD = &D;
9396 // Get nested teams distribute kind directive, if any.
9397 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9398 Kind != OMPD_target_teams_loop)
9399 TD = getNestedDistributeDirective(CGM.getContext(), D);
9400 if (!TD)
9401 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9403 const auto *LD = cast<OMPLoopDirective>(TD);
9404 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9405 return NumIterations;
9406 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9409 static void
9410 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9411 const OMPExecutableDirective &D,
9412 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9413 bool RequiresOuterTask, const CapturedStmt &CS,
9414 bool OffloadingMandatory, CodeGenFunction &CGF) {
9415 if (OffloadingMandatory) {
9416 CGF.Builder.CreateUnreachable();
9417 } else {
9418 if (RequiresOuterTask) {
9419 CapturedVars.clear();
9420 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9422 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9423 CapturedVars);
9427 static llvm::Value *emitDeviceID(
9428 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9429 CodeGenFunction &CGF) {
9430 // Emit device ID if any.
9431 llvm::Value *DeviceID;
9432 if (Device.getPointer()) {
9433 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9434 Device.getInt() == OMPC_DEVICE_device_num) &&
9435 "Expected device_num modifier.");
9436 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9437 DeviceID =
9438 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9439 } else {
9440 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9442 return DeviceID;
9445 llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9446 CodeGenFunction &CGF) {
9447 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9449 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9450 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9451 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9452 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9453 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9454 /*isSigned=*/false);
9456 return DynCGroupMem;
9459 static void emitTargetCallKernelLaunch(
9460 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9461 const OMPExecutableDirective &D,
9462 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9463 const CapturedStmt &CS, bool OffloadingMandatory,
9464 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9465 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9466 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9467 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9468 const OMPLoopDirective &D)>
9469 SizeEmitter,
9470 CodeGenFunction &CGF, CodeGenModule &CGM) {
9471 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9473 // Fill up the arrays with all the captured variables.
9474 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9476 // Get mappable expression information.
9477 MappableExprsHandler MEHandler(D, CGF);
9478 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9479 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9481 auto RI = CS.getCapturedRecordDecl()->field_begin();
9482 auto *CV = CapturedVars.begin();
9483 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9484 CE = CS.capture_end();
9485 CI != CE; ++CI, ++RI, ++CV) {
9486 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9487 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9489 // VLA sizes are passed to the outlined region by copy and do not have map
9490 // information associated.
9491 if (CI->capturesVariableArrayType()) {
9492 CurInfo.Exprs.push_back(nullptr);
9493 CurInfo.BasePointers.push_back(*CV);
9494 CurInfo.DevicePtrDecls.push_back(nullptr);
9495 CurInfo.DevicePointers.push_back(
9496 MappableExprsHandler::DeviceInfoTy::None);
9497 CurInfo.Pointers.push_back(*CV);
9498 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9499 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9500 // Copy to the device as an argument. No need to retrieve it.
9501 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9502 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9503 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9504 CurInfo.Mappers.push_back(nullptr);
9505 } else {
9506 // If we have any information in the map clause, we use it, otherwise we
9507 // just do a default mapping.
9508 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9509 if (!CI->capturesThis())
9510 MappedVarSet.insert(CI->getCapturedVar());
9511 else
9512 MappedVarSet.insert(nullptr);
9513 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9514 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9515 // Generate correct mapping for variables captured by reference in
9516 // lambdas.
9517 if (CI->capturesVariable())
9518 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9519 CurInfo, LambdaPointers);
9521 // We expect to have at least an element of information for this capture.
9522 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9523 "Non-existing map pointer for capture!");
9524 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9525 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9526 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9527 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9528 "Inconsistent map information sizes!");
9530 // If there is an entry in PartialStruct it means we have a struct with
9531 // individual members mapped. Emit an extra combined entry.
9532 if (PartialStruct.Base.isValid()) {
9533 CombinedInfo.append(PartialStruct.PreliminaryMapData);
9534 MEHandler.emitCombinedEntry(
9535 CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9536 OMPBuilder, nullptr,
9537 !PartialStruct.PreliminaryMapData.BasePointers.empty());
9540 // We need to append the results of this capture to what we already have.
9541 CombinedInfo.append(CurInfo);
9543 // Adjust MEMBER_OF flags for the lambdas captures.
9544 MEHandler.adjustMemberOfForLambdaCaptures(
9545 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9546 CombinedInfo.Pointers, CombinedInfo.Types);
9547 // Map any list items in a map clause that were not captures because they
9548 // weren't referenced within the construct.
9549 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
9551 CGOpenMPRuntime::TargetDataInfo Info;
9552 // Fill up the arrays and create the arguments.
9553 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9554 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9555 llvm::codegenoptions::NoDebugInfo;
9556 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9557 EmitDebug,
9558 /*ForEndCall=*/false);
9560 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9561 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9562 CGF.VoidPtrTy, CGM.getPointerAlign());
9563 InputInfo.PointersArray =
9564 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9565 InputInfo.SizesArray =
9566 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9567 InputInfo.MappersArray =
9568 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9569 MapTypesArray = Info.RTArgs.MapTypesArray;
9570 MapNamesArray = Info.RTArgs.MapNamesArray;
9572 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9573 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9574 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9575 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9576 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9578 if (IsReverseOffloading) {
9579 // Reverse offloading is not supported, so just execute on the host.
9580 // FIXME: This fallback solution is incorrect since it ignores the
9581 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9582 // assert here and ensure SEMA emits an error.
9583 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9584 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9585 return;
9588 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9589 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9591 llvm::Value *BasePointersArray = InputInfo.BasePointersArray.getPointer();
9592 llvm::Value *PointersArray = InputInfo.PointersArray.getPointer();
9593 llvm::Value *SizesArray = InputInfo.SizesArray.getPointer();
9594 llvm::Value *MappersArray = InputInfo.MappersArray.getPointer();
9596 auto &&EmitTargetCallFallbackCB =
9597 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9598 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9599 -> llvm::OpenMPIRBuilder::InsertPointTy {
9600 CGF.Builder.restoreIP(IP);
9601 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9602 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9603 return CGF.Builder.saveIP();
9606 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9607 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9608 llvm::Value *NumThreads =
9609 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9610 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9611 llvm::Value *NumIterations =
9612 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9613 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9614 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9615 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9617 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9618 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9619 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9621 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9622 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9623 DynCGGroupMem, HasNoWait);
9625 CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9626 CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9627 DeviceID, RTLoc, AllocaIP));
9630 if (RequiresOuterTask)
9631 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9632 else
9633 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9636 static void
9637 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9638 const OMPExecutableDirective &D,
9639 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9640 bool RequiresOuterTask, const CapturedStmt &CS,
9641 bool OffloadingMandatory, CodeGenFunction &CGF) {
9643 // Notify that the host version must be executed.
9644 auto &&ElseGen =
9645 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9646 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9647 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9648 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9651 if (RequiresOuterTask) {
9652 CodeGenFunction::OMPTargetDataInfo InputInfo;
9653 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9654 } else {
9655 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9659 void CGOpenMPRuntime::emitTargetCall(
9660 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9661 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9662 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9663 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9664 const OMPLoopDirective &D)>
9665 SizeEmitter) {
9666 if (!CGF.HaveInsertPoint())
9667 return;
9669 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9670 CGM.getLangOpts().OpenMPOffloadMandatory;
9672 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9674 const bool RequiresOuterTask =
9675 D.hasClausesOfKind<OMPDependClause>() ||
9676 D.hasClausesOfKind<OMPNowaitClause>() ||
9677 D.hasClausesOfKind<OMPInReductionClause>() ||
9678 (CGM.getLangOpts().OpenMP >= 51 &&
9679 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9680 D.hasClausesOfKind<OMPThreadLimitClause>());
9681 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9682 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9683 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9684 PrePostActionTy &) {
9685 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9687 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9689 CodeGenFunction::OMPTargetDataInfo InputInfo;
9690 llvm::Value *MapTypesArray = nullptr;
9691 llvm::Value *MapNamesArray = nullptr;
9693 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9694 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9695 OutlinedFnID, &InputInfo, &MapTypesArray,
9696 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9697 PrePostActionTy &) {
9698 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9699 RequiresOuterTask, CS, OffloadingMandatory,
9700 Device, OutlinedFnID, InputInfo, MapTypesArray,
9701 MapNamesArray, SizeEmitter, CGF, CGM);
9704 auto &&TargetElseGen =
9705 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9706 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9707 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9708 CS, OffloadingMandatory, CGF);
9711 // If we have a target function ID it means that we need to support
9712 // offloading, otherwise, just execute on the host. We need to execute on host
9713 // regardless of the conditional in the if clause if, e.g., the user do not
9714 // specify target triples.
9715 if (OutlinedFnID) {
9716 if (IfCond) {
9717 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9718 } else {
9719 RegionCodeGenTy ThenRCG(TargetThenGen);
9720 ThenRCG(CGF);
9722 } else {
9723 RegionCodeGenTy ElseRCG(TargetElseGen);
9724 ElseRCG(CGF);
9728 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9729 StringRef ParentName) {
9730 if (!S)
9731 return;
9733 // Codegen OMP target directives that offload compute to the device.
9734 bool RequiresDeviceCodegen =
9735 isa<OMPExecutableDirective>(S) &&
9736 isOpenMPTargetExecutionDirective(
9737 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9739 if (RequiresDeviceCodegen) {
9740 const auto &E = *cast<OMPExecutableDirective>(S);
9742 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9743 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9745 // Is this a target region that should not be emitted as an entry point? If
9746 // so just signal we are done with this target region.
9747 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9748 return;
9750 switch (E.getDirectiveKind()) {
9751 case OMPD_target:
9752 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9753 cast<OMPTargetDirective>(E));
9754 break;
9755 case OMPD_target_parallel:
9756 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9757 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9758 break;
9759 case OMPD_target_teams:
9760 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9761 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9762 break;
9763 case OMPD_target_teams_distribute:
9764 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9765 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9766 break;
9767 case OMPD_target_teams_distribute_simd:
9768 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9769 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9770 break;
9771 case OMPD_target_parallel_for:
9772 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9773 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9774 break;
9775 case OMPD_target_parallel_for_simd:
9776 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9777 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9778 break;
9779 case OMPD_target_simd:
9780 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9781 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9782 break;
9783 case OMPD_target_teams_distribute_parallel_for:
9784 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9785 CGM, ParentName,
9786 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9787 break;
9788 case OMPD_target_teams_distribute_parallel_for_simd:
9789 CodeGenFunction::
9790 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9791 CGM, ParentName,
9792 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9793 break;
9794 case OMPD_target_teams_loop:
9795 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9796 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9797 break;
9798 case OMPD_target_parallel_loop:
9799 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9800 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9801 break;
9802 case OMPD_parallel:
9803 case OMPD_for:
9804 case OMPD_parallel_for:
9805 case OMPD_parallel_master:
9806 case OMPD_parallel_sections:
9807 case OMPD_for_simd:
9808 case OMPD_parallel_for_simd:
9809 case OMPD_cancel:
9810 case OMPD_cancellation_point:
9811 case OMPD_ordered:
9812 case OMPD_threadprivate:
9813 case OMPD_allocate:
9814 case OMPD_task:
9815 case OMPD_simd:
9816 case OMPD_tile:
9817 case OMPD_unroll:
9818 case OMPD_sections:
9819 case OMPD_section:
9820 case OMPD_single:
9821 case OMPD_master:
9822 case OMPD_critical:
9823 case OMPD_taskyield:
9824 case OMPD_barrier:
9825 case OMPD_taskwait:
9826 case OMPD_taskgroup:
9827 case OMPD_atomic:
9828 case OMPD_flush:
9829 case OMPD_depobj:
9830 case OMPD_scan:
9831 case OMPD_teams:
9832 case OMPD_target_data:
9833 case OMPD_target_exit_data:
9834 case OMPD_target_enter_data:
9835 case OMPD_distribute:
9836 case OMPD_distribute_simd:
9837 case OMPD_distribute_parallel_for:
9838 case OMPD_distribute_parallel_for_simd:
9839 case OMPD_teams_distribute:
9840 case OMPD_teams_distribute_simd:
9841 case OMPD_teams_distribute_parallel_for:
9842 case OMPD_teams_distribute_parallel_for_simd:
9843 case OMPD_target_update:
9844 case OMPD_declare_simd:
9845 case OMPD_declare_variant:
9846 case OMPD_begin_declare_variant:
9847 case OMPD_end_declare_variant:
9848 case OMPD_declare_target:
9849 case OMPD_end_declare_target:
9850 case OMPD_declare_reduction:
9851 case OMPD_declare_mapper:
9852 case OMPD_taskloop:
9853 case OMPD_taskloop_simd:
9854 case OMPD_master_taskloop:
9855 case OMPD_master_taskloop_simd:
9856 case OMPD_parallel_master_taskloop:
9857 case OMPD_parallel_master_taskloop_simd:
9858 case OMPD_requires:
9859 case OMPD_metadirective:
9860 case OMPD_unknown:
9861 default:
9862 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9864 return;
9867 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9868 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9869 return;
9871 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9872 return;
9875 // If this is a lambda function, look into its body.
9876 if (const auto *L = dyn_cast<LambdaExpr>(S))
9877 S = L->getBody();
9879 // Keep looking for target regions recursively.
9880 for (const Stmt *II : S->children())
9881 scanForTargetRegionsFunctions(II, ParentName);
9884 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9885 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9886 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9887 if (!DevTy)
9888 return false;
9889 // Do not emit device_type(nohost) functions for the host.
9890 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9891 return true;
9892 // Do not emit device_type(host) functions for the device.
9893 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9894 return true;
9895 return false;
9898 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9899 // If emitting code for the host, we do not process FD here. Instead we do
9900 // the normal code generation.
9901 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9902 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9903 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9904 CGM.getLangOpts().OpenMPIsTargetDevice))
9905 return true;
9906 return false;
9909 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9910 // Try to detect target regions in the function.
9911 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9912 StringRef Name = CGM.getMangledName(GD);
9913 scanForTargetRegionsFunctions(FD->getBody(), Name);
9914 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9915 CGM.getLangOpts().OpenMPIsTargetDevice))
9916 return true;
9919 // Do not to emit function if it is not marked as declare target.
9920 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9921 AlreadyEmittedTargetDecls.count(VD) == 0;
9924 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9925 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9926 CGM.getLangOpts().OpenMPIsTargetDevice))
9927 return true;
9929 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9930 return false;
9932 // Check if there are Ctors/Dtors in this declaration and look for target
9933 // regions in it. We use the complete variant to produce the kernel name
9934 // mangling.
9935 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9936 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9937 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9938 StringRef ParentName =
9939 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9940 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9942 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9943 StringRef ParentName =
9944 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9945 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9949 // Do not to emit variable if it is not marked as declare target.
9950 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9951 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9952 cast<VarDecl>(GD.getDecl()));
9953 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9954 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9955 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9956 HasRequiresUnifiedSharedMemory)) {
9957 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9958 return true;
9960 return false;
9963 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9964 llvm::Constant *Addr) {
9965 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9966 !CGM.getLangOpts().OpenMPIsTargetDevice)
9967 return;
9969 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9970 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9972 // If this is an 'extern' declaration we defer to the canonical definition and
9973 // do not emit an offloading entry.
9974 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
9975 VD->hasExternalStorage())
9976 return;
9978 if (!Res) {
9979 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9980 // Register non-target variables being emitted in device code (debug info
9981 // may cause this).
9982 StringRef VarName = CGM.getMangledName(VD);
9983 EmittedNonTargetVariables.try_emplace(VarName, Addr);
9985 return;
9988 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
9989 auto LinkageForVariable = [&VD, this]() {
9990 return CGM.getLLVMLinkageVarDefinition(VD);
9993 std::vector<llvm::GlobalVariable *> GeneratedRefs;
9994 OMPBuilder.registerTargetGlobalVariable(
9995 convertCaptureClause(VD), convertDeviceClause(VD),
9996 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
9997 VD->isExternallyVisible(),
9998 getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
9999 VD->getCanonicalDecl()->getBeginLoc()),
10000 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10001 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10002 CGM.getTypes().ConvertTypeForMem(
10003 CGM.getContext().getPointerType(VD->getType())),
10004 Addr);
10006 for (auto *ref : GeneratedRefs)
10007 CGM.addCompilerUsedGlobal(ref);
10010 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10011 if (isa<FunctionDecl>(GD.getDecl()) ||
10012 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10013 return emitTargetFunctions(GD);
10015 return emitTargetGlobalVariable(GD);
10018 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10019 for (const VarDecl *VD : DeferredGlobalVariables) {
10020 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10021 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10022 if (!Res)
10023 continue;
10024 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10025 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10026 !HasRequiresUnifiedSharedMemory) {
10027 CGM.EmitGlobal(VD);
10028 } else {
10029 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10030 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10031 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10032 HasRequiresUnifiedSharedMemory)) &&
10033 "Expected link clause or to clause with unified memory.");
10034 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10039 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10040 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10041 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10042 " Expected target-based directive.");
10045 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10046 for (const OMPClause *Clause : D->clauselists()) {
10047 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10048 HasRequiresUnifiedSharedMemory = true;
10049 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10050 } else if (const auto *AC =
10051 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10052 switch (AC->getAtomicDefaultMemOrderKind()) {
10053 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10054 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10055 break;
10056 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10057 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10058 break;
10059 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10060 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10061 break;
10062 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10063 break;
10069 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10070 return RequiresAtomicOrdering;
10073 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10074 LangAS &AS) {
10075 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10076 return false;
10077 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10078 switch(A->getAllocatorType()) {
10079 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10080 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10081 // Not supported, fallback to the default mem space.
10082 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10083 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10084 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10085 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10086 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10087 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10088 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10089 AS = LangAS::Default;
10090 return true;
10091 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10092 llvm_unreachable("Expected predefined allocator for the variables with the "
10093 "static storage.");
10095 return false;
10098 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10099 return HasRequiresUnifiedSharedMemory;
10102 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10103 CodeGenModule &CGM)
10104 : CGM(CGM) {
10105 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10106 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10107 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10111 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10112 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10113 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10116 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10117 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10118 return true;
10120 const auto *D = cast<FunctionDecl>(GD.getDecl());
10121 // Do not to emit function if it is marked as declare target as it was already
10122 // emitted.
10123 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10124 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10125 if (auto *F = dyn_cast_or_null<llvm::Function>(
10126 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10127 return !F->isDeclaration();
10128 return false;
10130 return true;
10133 return !AlreadyEmittedTargetDecls.insert(D).second;
10136 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10137 // If we don't have entries or if we are emitting code for the device, we
10138 // don't need to do anything.
10139 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10140 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsTargetDevice ||
10141 (OMPBuilder.OffloadInfoManager.empty() &&
10142 !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion))
10143 return nullptr;
10145 // Create and register the function that handles the requires directives.
10146 ASTContext &C = CGM.getContext();
10148 llvm::Function *RequiresRegFn;
10150 CodeGenFunction CGF(CGM);
10151 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10152 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10153 std::string ReqName = getName({"omp_offloading", "requires_reg"});
10154 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10155 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10156 // TODO: check for other requires clauses.
10157 // The requires directive takes effect only when a target region is
10158 // present in the compilation unit. Otherwise it is ignored and not
10159 // passed to the runtime. This avoids the runtime from throwing an error
10160 // for mismatching requires clauses across compilation units that don't
10161 // contain at least 1 target region.
10162 assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion ||
10163 !OMPBuilder.OffloadInfoManager.empty()) &&
10164 "Target or declare target region expected.");
10165 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10166 CGM.getModule(), OMPRTL___tgt_register_requires),
10167 llvm::ConstantInt::get(
10168 CGM.Int64Ty, OMPBuilder.Config.getRequiresFlags()));
10169 CGF.FinishFunction();
10171 return RequiresRegFn;
10174 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10175 const OMPExecutableDirective &D,
10176 SourceLocation Loc,
10177 llvm::Function *OutlinedFn,
10178 ArrayRef<llvm::Value *> CapturedVars) {
10179 if (!CGF.HaveInsertPoint())
10180 return;
10182 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10183 CodeGenFunction::RunCleanupsScope Scope(CGF);
10185 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10186 llvm::Value *Args[] = {
10187 RTLoc,
10188 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10189 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10190 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10191 RealArgs.append(std::begin(Args), std::end(Args));
10192 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10194 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10195 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10196 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10199 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10200 const Expr *NumTeams,
10201 const Expr *ThreadLimit,
10202 SourceLocation Loc) {
10203 if (!CGF.HaveInsertPoint())
10204 return;
10206 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10208 llvm::Value *NumTeamsVal =
10209 NumTeams
10210 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10211 CGF.CGM.Int32Ty, /* isSigned = */ true)
10212 : CGF.Builder.getInt32(0);
10214 llvm::Value *ThreadLimitVal =
10215 ThreadLimit
10216 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10217 CGF.CGM.Int32Ty, /* isSigned = */ true)
10218 : CGF.Builder.getInt32(0);
10220 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10221 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10222 ThreadLimitVal};
10223 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10224 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10225 PushNumTeamsArgs);
10228 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10229 const Expr *ThreadLimit,
10230 SourceLocation Loc) {
10231 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10232 llvm::Value *ThreadLimitVal =
10233 ThreadLimit
10234 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10235 CGF.CGM.Int32Ty, /* isSigned = */ true)
10236 : CGF.Builder.getInt32(0);
10238 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10239 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10240 ThreadLimitVal};
10241 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10242 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10243 ThreadLimitArgs);
10246 void CGOpenMPRuntime::emitTargetDataCalls(
10247 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10248 const Expr *Device, const RegionCodeGenTy &CodeGen,
10249 CGOpenMPRuntime::TargetDataInfo &Info) {
10250 if (!CGF.HaveInsertPoint())
10251 return;
10253 // Action used to replace the default codegen action and turn privatization
10254 // off.
10255 PrePostActionTy NoPrivAction;
10257 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10259 llvm::Value *IfCondVal = nullptr;
10260 if (IfCond)
10261 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10263 // Emit device ID if any.
10264 llvm::Value *DeviceID = nullptr;
10265 if (Device) {
10266 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10267 CGF.Int64Ty, /*isSigned=*/true);
10268 } else {
10269 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10272 // Fill up the arrays with all the mapped variables.
10273 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10274 auto GenMapInfoCB =
10275 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10276 CGF.Builder.restoreIP(CodeGenIP);
10277 // Get map clause information.
10278 MappableExprsHandler MEHandler(D, CGF);
10279 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10281 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10282 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10284 if (CGM.getCodeGenOpts().getDebugInfo() !=
10285 llvm::codegenoptions::NoDebugInfo) {
10286 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10287 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10288 FillInfoMap);
10291 return CombinedInfo;
10293 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10294 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10295 CGF.Builder.restoreIP(CodeGenIP);
10296 switch (BodyGenType) {
10297 case BodyGenTy::Priv:
10298 if (!Info.CaptureDeviceAddrMap.empty())
10299 CodeGen(CGF);
10300 break;
10301 case BodyGenTy::DupNoPriv:
10302 if (!Info.CaptureDeviceAddrMap.empty()) {
10303 CodeGen.setAction(NoPrivAction);
10304 CodeGen(CGF);
10306 break;
10307 case BodyGenTy::NoPriv:
10308 if (Info.CaptureDeviceAddrMap.empty()) {
10309 CodeGen.setAction(NoPrivAction);
10310 CodeGen(CGF);
10312 break;
10314 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10315 CGF.Builder.GetInsertPoint());
10318 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10319 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10320 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10324 auto CustomMapperCB = [&](unsigned int I) {
10325 llvm::Value *MFunc = nullptr;
10326 if (CombinedInfo.Mappers[I]) {
10327 Info.HasMapper = true;
10328 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10329 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10331 return MFunc;
10334 // Source location for the ident struct
10335 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10337 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10338 CGF.AllocaInsertPt->getIterator());
10339 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10340 CGF.Builder.GetInsertPoint());
10341 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10342 CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10343 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10344 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10347 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10348 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10349 const Expr *Device) {
10350 if (!CGF.HaveInsertPoint())
10351 return;
10353 assert((isa<OMPTargetEnterDataDirective>(D) ||
10354 isa<OMPTargetExitDataDirective>(D) ||
10355 isa<OMPTargetUpdateDirective>(D)) &&
10356 "Expecting either target enter, exit data, or update directives.");
10358 CodeGenFunction::OMPTargetDataInfo InputInfo;
10359 llvm::Value *MapTypesArray = nullptr;
10360 llvm::Value *MapNamesArray = nullptr;
10361 // Generate the code for the opening of the data environment.
10362 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10363 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10364 // Emit device ID if any.
10365 llvm::Value *DeviceID = nullptr;
10366 if (Device) {
10367 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10368 CGF.Int64Ty, /*isSigned=*/true);
10369 } else {
10370 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10373 // Emit the number of elements in the offloading arrays.
10374 llvm::Constant *PointerNum =
10375 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10377 // Source location for the ident struct
10378 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10380 llvm::Value *OffloadingArgs[] = {RTLoc,
10381 DeviceID,
10382 PointerNum,
10383 InputInfo.BasePointersArray.getPointer(),
10384 InputInfo.PointersArray.getPointer(),
10385 InputInfo.SizesArray.getPointer(),
10386 MapTypesArray,
10387 MapNamesArray,
10388 InputInfo.MappersArray.getPointer()};
10390 // Select the right runtime function call for each standalone
10391 // directive.
10392 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10393 RuntimeFunction RTLFn;
10394 switch (D.getDirectiveKind()) {
10395 case OMPD_target_enter_data:
10396 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10397 : OMPRTL___tgt_target_data_begin_mapper;
10398 break;
10399 case OMPD_target_exit_data:
10400 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10401 : OMPRTL___tgt_target_data_end_mapper;
10402 break;
10403 case OMPD_target_update:
10404 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10405 : OMPRTL___tgt_target_data_update_mapper;
10406 break;
10407 case OMPD_parallel:
10408 case OMPD_for:
10409 case OMPD_parallel_for:
10410 case OMPD_parallel_master:
10411 case OMPD_parallel_sections:
10412 case OMPD_for_simd:
10413 case OMPD_parallel_for_simd:
10414 case OMPD_cancel:
10415 case OMPD_cancellation_point:
10416 case OMPD_ordered:
10417 case OMPD_threadprivate:
10418 case OMPD_allocate:
10419 case OMPD_task:
10420 case OMPD_simd:
10421 case OMPD_tile:
10422 case OMPD_unroll:
10423 case OMPD_sections:
10424 case OMPD_section:
10425 case OMPD_single:
10426 case OMPD_master:
10427 case OMPD_critical:
10428 case OMPD_taskyield:
10429 case OMPD_barrier:
10430 case OMPD_taskwait:
10431 case OMPD_taskgroup:
10432 case OMPD_atomic:
10433 case OMPD_flush:
10434 case OMPD_depobj:
10435 case OMPD_scan:
10436 case OMPD_teams:
10437 case OMPD_target_data:
10438 case OMPD_distribute:
10439 case OMPD_distribute_simd:
10440 case OMPD_distribute_parallel_for:
10441 case OMPD_distribute_parallel_for_simd:
10442 case OMPD_teams_distribute:
10443 case OMPD_teams_distribute_simd:
10444 case OMPD_teams_distribute_parallel_for:
10445 case OMPD_teams_distribute_parallel_for_simd:
10446 case OMPD_declare_simd:
10447 case OMPD_declare_variant:
10448 case OMPD_begin_declare_variant:
10449 case OMPD_end_declare_variant:
10450 case OMPD_declare_target:
10451 case OMPD_end_declare_target:
10452 case OMPD_declare_reduction:
10453 case OMPD_declare_mapper:
10454 case OMPD_taskloop:
10455 case OMPD_taskloop_simd:
10456 case OMPD_master_taskloop:
10457 case OMPD_master_taskloop_simd:
10458 case OMPD_parallel_master_taskloop:
10459 case OMPD_parallel_master_taskloop_simd:
10460 case OMPD_target:
10461 case OMPD_target_simd:
10462 case OMPD_target_teams_distribute:
10463 case OMPD_target_teams_distribute_simd:
10464 case OMPD_target_teams_distribute_parallel_for:
10465 case OMPD_target_teams_distribute_parallel_for_simd:
10466 case OMPD_target_teams:
10467 case OMPD_target_parallel:
10468 case OMPD_target_parallel_for:
10469 case OMPD_target_parallel_for_simd:
10470 case OMPD_requires:
10471 case OMPD_metadirective:
10472 case OMPD_unknown:
10473 default:
10474 llvm_unreachable("Unexpected standalone target data directive.");
10475 break;
10477 CGF.EmitRuntimeCall(
10478 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10479 OffloadingArgs);
10482 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10483 &MapNamesArray](CodeGenFunction &CGF,
10484 PrePostActionTy &) {
10485 // Fill up the arrays with all the mapped variables.
10486 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10488 // Get map clause information.
10489 MappableExprsHandler MEHandler(D, CGF);
10490 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10492 CGOpenMPRuntime::TargetDataInfo Info;
10493 // Fill up the arrays and create the arguments.
10494 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10495 /*IsNonContiguous=*/true);
10496 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10497 D.hasClausesOfKind<OMPNowaitClause>();
10498 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10499 llvm::codegenoptions::NoDebugInfo;
10500 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10501 EmitDebug,
10502 /*ForEndCall=*/false);
10503 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10504 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10505 CGF.VoidPtrTy, CGM.getPointerAlign());
10506 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10507 CGM.getPointerAlign());
10508 InputInfo.SizesArray =
10509 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10510 InputInfo.MappersArray =
10511 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10512 MapTypesArray = Info.RTArgs.MapTypesArray;
10513 MapNamesArray = Info.RTArgs.MapNamesArray;
10514 if (RequiresOuterTask)
10515 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10516 else
10517 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10520 if (IfCond) {
10521 emitIfClause(CGF, IfCond, TargetThenGen,
10522 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10523 } else {
10524 RegionCodeGenTy ThenRCG(TargetThenGen);
10525 ThenRCG(CGF);
10529 namespace {
10530 /// Kind of parameter in a function with 'declare simd' directive.
10531 enum ParamKindTy {
10532 Linear,
10533 LinearRef,
10534 LinearUVal,
10535 LinearVal,
10536 Uniform,
10537 Vector,
10539 /// Attribute set of the parameter.
10540 struct ParamAttrTy {
10541 ParamKindTy Kind = Vector;
10542 llvm::APSInt StrideOrArg;
10543 llvm::APSInt Alignment;
10544 bool HasVarStride = false;
10546 } // namespace
10548 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10549 ArrayRef<ParamAttrTy> ParamAttrs) {
10550 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10551 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10552 // of that clause. The VLEN value must be power of 2.
10553 // In other case the notion of the function`s "characteristic data type" (CDT)
10554 // is used to compute the vector length.
10555 // CDT is defined in the following order:
10556 // a) For non-void function, the CDT is the return type.
10557 // b) If the function has any non-uniform, non-linear parameters, then the
10558 // CDT is the type of the first such parameter.
10559 // c) If the CDT determined by a) or b) above is struct, union, or class
10560 // type which is pass-by-value (except for the type that maps to the
10561 // built-in complex data type), the characteristic data type is int.
10562 // d) If none of the above three cases is applicable, the CDT is int.
10563 // The VLEN is then determined based on the CDT and the size of vector
10564 // register of that ISA for which current vector version is generated. The
10565 // VLEN is computed using the formula below:
10566 // VLEN = sizeof(vector_register) / sizeof(CDT),
10567 // where vector register size specified in section 3.2.1 Registers and the
10568 // Stack Frame of original AMD64 ABI document.
10569 QualType RetType = FD->getReturnType();
10570 if (RetType.isNull())
10571 return 0;
10572 ASTContext &C = FD->getASTContext();
10573 QualType CDT;
10574 if (!RetType.isNull() && !RetType->isVoidType()) {
10575 CDT = RetType;
10576 } else {
10577 unsigned Offset = 0;
10578 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10579 if (ParamAttrs[Offset].Kind == Vector)
10580 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10581 ++Offset;
10583 if (CDT.isNull()) {
10584 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10585 if (ParamAttrs[I + Offset].Kind == Vector) {
10586 CDT = FD->getParamDecl(I)->getType();
10587 break;
10592 if (CDT.isNull())
10593 CDT = C.IntTy;
10594 CDT = CDT->getCanonicalTypeUnqualified();
10595 if (CDT->isRecordType() || CDT->isUnionType())
10596 CDT = C.IntTy;
10597 return C.getTypeSize(CDT);
10600 /// Mangle the parameter part of the vector function name according to
10601 /// their OpenMP classification. The mangling function is defined in
10602 /// section 4.5 of the AAVFABI(2021Q1).
10603 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10604 SmallString<256> Buffer;
10605 llvm::raw_svector_ostream Out(Buffer);
10606 for (const auto &ParamAttr : ParamAttrs) {
10607 switch (ParamAttr.Kind) {
10608 case Linear:
10609 Out << 'l';
10610 break;
10611 case LinearRef:
10612 Out << 'R';
10613 break;
10614 case LinearUVal:
10615 Out << 'U';
10616 break;
10617 case LinearVal:
10618 Out << 'L';
10619 break;
10620 case Uniform:
10621 Out << 'u';
10622 break;
10623 case Vector:
10624 Out << 'v';
10625 break;
10627 if (ParamAttr.HasVarStride)
10628 Out << "s" << ParamAttr.StrideOrArg;
10629 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10630 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10631 // Don't print the step value if it is not present or if it is
10632 // equal to 1.
10633 if (ParamAttr.StrideOrArg < 0)
10634 Out << 'n' << -ParamAttr.StrideOrArg;
10635 else if (ParamAttr.StrideOrArg != 1)
10636 Out << ParamAttr.StrideOrArg;
10639 if (!!ParamAttr.Alignment)
10640 Out << 'a' << ParamAttr.Alignment;
10643 return std::string(Out.str());
10646 static void
10647 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10648 const llvm::APSInt &VLENVal,
10649 ArrayRef<ParamAttrTy> ParamAttrs,
10650 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10651 struct ISADataTy {
10652 char ISA;
10653 unsigned VecRegSize;
10655 ISADataTy ISAData[] = {
10657 'b', 128
10658 }, // SSE
10660 'c', 256
10661 }, // AVX
10663 'd', 256
10664 }, // AVX2
10666 'e', 512
10667 }, // AVX512
10669 llvm::SmallVector<char, 2> Masked;
10670 switch (State) {
10671 case OMPDeclareSimdDeclAttr::BS_Undefined:
10672 Masked.push_back('N');
10673 Masked.push_back('M');
10674 break;
10675 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10676 Masked.push_back('N');
10677 break;
10678 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10679 Masked.push_back('M');
10680 break;
10682 for (char Mask : Masked) {
10683 for (const ISADataTy &Data : ISAData) {
10684 SmallString<256> Buffer;
10685 llvm::raw_svector_ostream Out(Buffer);
10686 Out << "_ZGV" << Data.ISA << Mask;
10687 if (!VLENVal) {
10688 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10689 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10690 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10691 } else {
10692 Out << VLENVal;
10694 Out << mangleVectorParameters(ParamAttrs);
10695 Out << '_' << Fn->getName();
10696 Fn->addFnAttr(Out.str());
10701 // This are the Functions that are needed to mangle the name of the
10702 // vector functions generated by the compiler, according to the rules
10703 // defined in the "Vector Function ABI specifications for AArch64",
10704 // available at
10705 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10707 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10708 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10709 QT = QT.getCanonicalType();
10711 if (QT->isVoidType())
10712 return false;
10714 if (Kind == ParamKindTy::Uniform)
10715 return false;
10717 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10718 return false;
10720 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10721 !QT->isReferenceType())
10722 return false;
10724 return true;
10727 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10728 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10729 QT = QT.getCanonicalType();
10730 unsigned Size = C.getTypeSize(QT);
10732 // Only scalars and complex within 16 bytes wide set PVB to true.
10733 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10734 return false;
10736 if (QT->isFloatingType())
10737 return true;
10739 if (QT->isIntegerType())
10740 return true;
10742 if (QT->isPointerType())
10743 return true;
10745 // TODO: Add support for complex types (section 3.1.2, item 2).
10747 return false;
10750 /// Computes the lane size (LS) of a return type or of an input parameter,
10751 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10752 /// TODO: Add support for references, section 3.2.1, item 1.
10753 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10754 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10755 QualType PTy = QT.getCanonicalType()->getPointeeType();
10756 if (getAArch64PBV(PTy, C))
10757 return C.getTypeSize(PTy);
10759 if (getAArch64PBV(QT, C))
10760 return C.getTypeSize(QT);
10762 return C.getTypeSize(C.getUIntPtrType());
10765 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10766 // signature of the scalar function, as defined in 3.2.2 of the
10767 // AAVFABI.
10768 static std::tuple<unsigned, unsigned, bool>
10769 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10770 QualType RetType = FD->getReturnType().getCanonicalType();
10772 ASTContext &C = FD->getASTContext();
10774 bool OutputBecomesInput = false;
10776 llvm::SmallVector<unsigned, 8> Sizes;
10777 if (!RetType->isVoidType()) {
10778 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10779 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10780 OutputBecomesInput = true;
10782 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10783 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10784 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10787 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10788 // The LS of a function parameter / return value can only be a power
10789 // of 2, starting from 8 bits, up to 128.
10790 assert(llvm::all_of(Sizes,
10791 [](unsigned Size) {
10792 return Size == 8 || Size == 16 || Size == 32 ||
10793 Size == 64 || Size == 128;
10794 }) &&
10795 "Invalid size");
10797 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10798 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10799 OutputBecomesInput);
10802 // Function used to add the attribute. The parameter `VLEN` is
10803 // templated to allow the use of "x" when targeting scalable functions
10804 // for SVE.
10805 template <typename T>
10806 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10807 char ISA, StringRef ParSeq,
10808 StringRef MangledName, bool OutputBecomesInput,
10809 llvm::Function *Fn) {
10810 SmallString<256> Buffer;
10811 llvm::raw_svector_ostream Out(Buffer);
10812 Out << Prefix << ISA << LMask << VLEN;
10813 if (OutputBecomesInput)
10814 Out << "v";
10815 Out << ParSeq << "_" << MangledName;
10816 Fn->addFnAttr(Out.str());
10819 // Helper function to generate the Advanced SIMD names depending on
10820 // the value of the NDS when simdlen is not present.
10821 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10822 StringRef Prefix, char ISA,
10823 StringRef ParSeq, StringRef MangledName,
10824 bool OutputBecomesInput,
10825 llvm::Function *Fn) {
10826 switch (NDS) {
10827 case 8:
10828 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10829 OutputBecomesInput, Fn);
10830 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10831 OutputBecomesInput, Fn);
10832 break;
10833 case 16:
10834 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10835 OutputBecomesInput, Fn);
10836 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10837 OutputBecomesInput, Fn);
10838 break;
10839 case 32:
10840 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10841 OutputBecomesInput, Fn);
10842 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10843 OutputBecomesInput, Fn);
10844 break;
10845 case 64:
10846 case 128:
10847 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10848 OutputBecomesInput, Fn);
10849 break;
10850 default:
10851 llvm_unreachable("Scalar type is too wide.");
10855 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10856 static void emitAArch64DeclareSimdFunction(
10857 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10858 ArrayRef<ParamAttrTy> ParamAttrs,
10859 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10860 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10862 // Get basic data for building the vector signature.
10863 const auto Data = getNDSWDS(FD, ParamAttrs);
10864 const unsigned NDS = std::get<0>(Data);
10865 const unsigned WDS = std::get<1>(Data);
10866 const bool OutputBecomesInput = std::get<2>(Data);
10868 // Check the values provided via `simdlen` by the user.
10869 // 1. A `simdlen(1)` doesn't produce vector signatures,
10870 if (UserVLEN == 1) {
10871 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10872 DiagnosticsEngine::Warning,
10873 "The clause simdlen(1) has no effect when targeting aarch64.");
10874 CGM.getDiags().Report(SLoc, DiagID);
10875 return;
10878 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10879 // Advanced SIMD output.
10880 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10881 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10882 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10883 "power of 2 when targeting Advanced SIMD.");
10884 CGM.getDiags().Report(SLoc, DiagID);
10885 return;
10888 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10889 // limits.
10890 if (ISA == 's' && UserVLEN != 0) {
10891 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10892 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10893 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10894 "lanes in the architectural constraints "
10895 "for SVE (min is 128-bit, max is "
10896 "2048-bit, by steps of 128-bit)");
10897 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10898 return;
10902 // Sort out parameter sequence.
10903 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10904 StringRef Prefix = "_ZGV";
10905 // Generate simdlen from user input (if any).
10906 if (UserVLEN) {
10907 if (ISA == 's') {
10908 // SVE generates only a masked function.
10909 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10910 OutputBecomesInput, Fn);
10911 } else {
10912 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10913 // Advanced SIMD generates one or two functions, depending on
10914 // the `[not]inbranch` clause.
10915 switch (State) {
10916 case OMPDeclareSimdDeclAttr::BS_Undefined:
10917 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10918 OutputBecomesInput, Fn);
10919 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10920 OutputBecomesInput, Fn);
10921 break;
10922 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10923 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10924 OutputBecomesInput, Fn);
10925 break;
10926 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10927 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10928 OutputBecomesInput, Fn);
10929 break;
10932 } else {
10933 // If no user simdlen is provided, follow the AAVFABI rules for
10934 // generating the vector length.
10935 if (ISA == 's') {
10936 // SVE, section 3.4.1, item 1.
10937 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10938 OutputBecomesInput, Fn);
10939 } else {
10940 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10941 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10942 // two vector names depending on the use of the clause
10943 // `[not]inbranch`.
10944 switch (State) {
10945 case OMPDeclareSimdDeclAttr::BS_Undefined:
10946 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10947 OutputBecomesInput, Fn);
10948 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10949 OutputBecomesInput, Fn);
10950 break;
10951 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10952 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10953 OutputBecomesInput, Fn);
10954 break;
10955 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10956 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10957 OutputBecomesInput, Fn);
10958 break;
10964 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10965 llvm::Function *Fn) {
10966 ASTContext &C = CGM.getContext();
10967 FD = FD->getMostRecentDecl();
10968 while (FD) {
10969 // Map params to their positions in function decl.
10970 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10971 if (isa<CXXMethodDecl>(FD))
10972 ParamPositions.try_emplace(FD, 0);
10973 unsigned ParamPos = ParamPositions.size();
10974 for (const ParmVarDecl *P : FD->parameters()) {
10975 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10976 ++ParamPos;
10978 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10979 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10980 // Mark uniform parameters.
10981 for (const Expr *E : Attr->uniforms()) {
10982 E = E->IgnoreParenImpCasts();
10983 unsigned Pos;
10984 if (isa<CXXThisExpr>(E)) {
10985 Pos = ParamPositions[FD];
10986 } else {
10987 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10988 ->getCanonicalDecl();
10989 auto It = ParamPositions.find(PVD);
10990 assert(It != ParamPositions.end() && "Function parameter not found");
10991 Pos = It->second;
10993 ParamAttrs[Pos].Kind = Uniform;
10995 // Get alignment info.
10996 auto *NI = Attr->alignments_begin();
10997 for (const Expr *E : Attr->aligneds()) {
10998 E = E->IgnoreParenImpCasts();
10999 unsigned Pos;
11000 QualType ParmTy;
11001 if (isa<CXXThisExpr>(E)) {
11002 Pos = ParamPositions[FD];
11003 ParmTy = E->getType();
11004 } else {
11005 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11006 ->getCanonicalDecl();
11007 auto It = ParamPositions.find(PVD);
11008 assert(It != ParamPositions.end() && "Function parameter not found");
11009 Pos = It->second;
11010 ParmTy = PVD->getType();
11012 ParamAttrs[Pos].Alignment =
11013 (*NI)
11014 ? (*NI)->EvaluateKnownConstInt(C)
11015 : llvm::APSInt::getUnsigned(
11016 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11017 .getQuantity());
11018 ++NI;
11020 // Mark linear parameters.
11021 auto *SI = Attr->steps_begin();
11022 auto *MI = Attr->modifiers_begin();
11023 for (const Expr *E : Attr->linears()) {
11024 E = E->IgnoreParenImpCasts();
11025 unsigned Pos;
11026 bool IsReferenceType = false;
11027 // Rescaling factor needed to compute the linear parameter
11028 // value in the mangled name.
11029 unsigned PtrRescalingFactor = 1;
11030 if (isa<CXXThisExpr>(E)) {
11031 Pos = ParamPositions[FD];
11032 auto *P = cast<PointerType>(E->getType());
11033 PtrRescalingFactor = CGM.getContext()
11034 .getTypeSizeInChars(P->getPointeeType())
11035 .getQuantity();
11036 } else {
11037 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11038 ->getCanonicalDecl();
11039 auto It = ParamPositions.find(PVD);
11040 assert(It != ParamPositions.end() && "Function parameter not found");
11041 Pos = It->second;
11042 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11043 PtrRescalingFactor = CGM.getContext()
11044 .getTypeSizeInChars(P->getPointeeType())
11045 .getQuantity();
11046 else if (PVD->getType()->isReferenceType()) {
11047 IsReferenceType = true;
11048 PtrRescalingFactor =
11049 CGM.getContext()
11050 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11051 .getQuantity();
11054 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11055 if (*MI == OMPC_LINEAR_ref)
11056 ParamAttr.Kind = LinearRef;
11057 else if (*MI == OMPC_LINEAR_uval)
11058 ParamAttr.Kind = LinearUVal;
11059 else if (IsReferenceType)
11060 ParamAttr.Kind = LinearVal;
11061 else
11062 ParamAttr.Kind = Linear;
11063 // Assuming a stride of 1, for `linear` without modifiers.
11064 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11065 if (*SI) {
11066 Expr::EvalResult Result;
11067 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11068 if (const auto *DRE =
11069 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11070 if (const auto *StridePVD =
11071 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11072 ParamAttr.HasVarStride = true;
11073 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11074 assert(It != ParamPositions.end() &&
11075 "Function parameter not found");
11076 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11079 } else {
11080 ParamAttr.StrideOrArg = Result.Val.getInt();
11083 // If we are using a linear clause on a pointer, we need to
11084 // rescale the value of linear_step with the byte size of the
11085 // pointee type.
11086 if (!ParamAttr.HasVarStride &&
11087 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11088 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11089 ++SI;
11090 ++MI;
11092 llvm::APSInt VLENVal;
11093 SourceLocation ExprLoc;
11094 const Expr *VLENExpr = Attr->getSimdlen();
11095 if (VLENExpr) {
11096 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11097 ExprLoc = VLENExpr->getExprLoc();
11099 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11100 if (CGM.getTriple().isX86()) {
11101 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11102 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11103 unsigned VLEN = VLENVal.getExtValue();
11104 StringRef MangledName = Fn->getName();
11105 if (CGM.getTarget().hasFeature("sve"))
11106 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11107 MangledName, 's', 128, Fn, ExprLoc);
11108 else if (CGM.getTarget().hasFeature("neon"))
11109 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11110 MangledName, 'n', 128, Fn, ExprLoc);
11113 FD = FD->getPreviousDecl();
11117 namespace {
11118 /// Cleanup action for doacross support.
11119 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11120 public:
11121 static const int DoacrossFinArgs = 2;
11123 private:
11124 llvm::FunctionCallee RTLFn;
11125 llvm::Value *Args[DoacrossFinArgs];
11127 public:
11128 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11129 ArrayRef<llvm::Value *> CallArgs)
11130 : RTLFn(RTLFn) {
11131 assert(CallArgs.size() == DoacrossFinArgs);
11132 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11134 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11135 if (!CGF.HaveInsertPoint())
11136 return;
11137 CGF.EmitRuntimeCall(RTLFn, Args);
11140 } // namespace
11142 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11143 const OMPLoopDirective &D,
11144 ArrayRef<Expr *> NumIterations) {
11145 if (!CGF.HaveInsertPoint())
11146 return;
11148 ASTContext &C = CGM.getContext();
11149 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11150 RecordDecl *RD;
11151 if (KmpDimTy.isNull()) {
11152 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11153 // kmp_int64 lo; // lower
11154 // kmp_int64 up; // upper
11155 // kmp_int64 st; // stride
11156 // };
11157 RD = C.buildImplicitRecord("kmp_dim");
11158 RD->startDefinition();
11159 addFieldToRecordDecl(C, RD, Int64Ty);
11160 addFieldToRecordDecl(C, RD, Int64Ty);
11161 addFieldToRecordDecl(C, RD, Int64Ty);
11162 RD->completeDefinition();
11163 KmpDimTy = C.getRecordType(RD);
11164 } else {
11165 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11167 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11168 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11169 ArraySizeModifier::Normal, 0);
11171 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11172 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11173 enum { LowerFD = 0, UpperFD, StrideFD };
11174 // Fill dims with data.
11175 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11176 LValue DimsLVal = CGF.MakeAddrLValue(
11177 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11178 // dims.upper = num_iterations;
11179 LValue UpperLVal = CGF.EmitLValueForField(
11180 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11181 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11182 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11183 Int64Ty, NumIterations[I]->getExprLoc());
11184 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11185 // dims.stride = 1;
11186 LValue StrideLVal = CGF.EmitLValueForField(
11187 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11188 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11189 StrideLVal);
11192 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11193 // kmp_int32 num_dims, struct kmp_dim * dims);
11194 llvm::Value *Args[] = {
11195 emitUpdateLocation(CGF, D.getBeginLoc()),
11196 getThreadID(CGF, D.getBeginLoc()),
11197 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11198 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11199 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11200 CGM.VoidPtrTy)};
11202 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11203 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11204 CGF.EmitRuntimeCall(RTLFn, Args);
11205 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11206 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11207 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11208 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11209 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11210 llvm::ArrayRef(FiniArgs));
11213 template <typename T>
11214 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11215 const T *C, llvm::Value *ULoc,
11216 llvm::Value *ThreadID) {
11217 QualType Int64Ty =
11218 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11219 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11220 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11221 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11222 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11223 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11224 const Expr *CounterVal = C->getLoopData(I);
11225 assert(CounterVal);
11226 llvm::Value *CntVal = CGF.EmitScalarConversion(
11227 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11228 CounterVal->getExprLoc());
11229 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11230 /*Volatile=*/false, Int64Ty);
11232 llvm::Value *Args[] = {
11233 ULoc, ThreadID, CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11234 llvm::FunctionCallee RTLFn;
11235 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11236 OMPDoacrossKind<T> ODK;
11237 if (ODK.isSource(C)) {
11238 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11239 OMPRTL___kmpc_doacross_post);
11240 } else {
11241 assert(ODK.isSink(C) && "Expect sink modifier.");
11242 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11243 OMPRTL___kmpc_doacross_wait);
11245 CGF.EmitRuntimeCall(RTLFn, Args);
11248 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11249 const OMPDependClause *C) {
11250 return EmitDoacrossOrdered<OMPDependClause>(
11251 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11252 getThreadID(CGF, C->getBeginLoc()));
11255 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11256 const OMPDoacrossClause *C) {
11257 return EmitDoacrossOrdered<OMPDoacrossClause>(
11258 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11259 getThreadID(CGF, C->getBeginLoc()));
11262 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11263 llvm::FunctionCallee Callee,
11264 ArrayRef<llvm::Value *> Args) const {
11265 assert(Loc.isValid() && "Outlined function call location must be valid.");
11266 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11268 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11269 if (Fn->doesNotThrow()) {
11270 CGF.EmitNounwindRuntimeCall(Fn, Args);
11271 return;
11274 CGF.EmitRuntimeCall(Callee, Args);
11277 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11278 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11279 ArrayRef<llvm::Value *> Args) const {
11280 emitCall(CGF, Loc, OutlinedFn, Args);
11283 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11284 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11285 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11286 HasEmittedDeclareTargetRegion = true;
11289 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11290 const VarDecl *NativeParam,
11291 const VarDecl *TargetParam) const {
11292 return CGF.GetAddrOfLocalVar(NativeParam);
11295 /// Return allocator value from expression, or return a null allocator (default
11296 /// when no allocator specified).
11297 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11298 const Expr *Allocator) {
11299 llvm::Value *AllocVal;
11300 if (Allocator) {
11301 AllocVal = CGF.EmitScalarExpr(Allocator);
11302 // According to the standard, the original allocator type is a enum
11303 // (integer). Convert to pointer type, if required.
11304 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11305 CGF.getContext().VoidPtrTy,
11306 Allocator->getExprLoc());
11307 } else {
11308 // If no allocator specified, it defaults to the null allocator.
11309 AllocVal = llvm::Constant::getNullValue(
11310 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11312 return AllocVal;
11315 /// Return the alignment from an allocate directive if present.
11316 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11317 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11319 if (!AllocateAlignment)
11320 return nullptr;
11322 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11325 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11326 const VarDecl *VD) {
11327 if (!VD)
11328 return Address::invalid();
11329 Address UntiedAddr = Address::invalid();
11330 Address UntiedRealAddr = Address::invalid();
11331 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11332 if (It != FunctionToUntiedTaskStackMap.end()) {
11333 const UntiedLocalVarsAddressesMap &UntiedData =
11334 UntiedLocalVarsStack[It->second];
11335 auto I = UntiedData.find(VD);
11336 if (I != UntiedData.end()) {
11337 UntiedAddr = I->second.first;
11338 UntiedRealAddr = I->second.second;
11341 const VarDecl *CVD = VD->getCanonicalDecl();
11342 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11343 // Use the default allocation.
11344 if (!isAllocatableDecl(VD))
11345 return UntiedAddr;
11346 llvm::Value *Size;
11347 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11348 if (CVD->getType()->isVariablyModifiedType()) {
11349 Size = CGF.getTypeSize(CVD->getType());
11350 // Align the size: ((size + align - 1) / align) * align
11351 Size = CGF.Builder.CreateNUWAdd(
11352 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11353 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11354 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11355 } else {
11356 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11357 Size = CGM.getSize(Sz.alignTo(Align));
11359 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11360 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11361 const Expr *Allocator = AA->getAllocator();
11362 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11363 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11364 SmallVector<llvm::Value *, 4> Args;
11365 Args.push_back(ThreadID);
11366 if (Alignment)
11367 Args.push_back(Alignment);
11368 Args.push_back(Size);
11369 Args.push_back(AllocVal);
11370 llvm::omp::RuntimeFunction FnID =
11371 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11372 llvm::Value *Addr = CGF.EmitRuntimeCall(
11373 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11374 getName({CVD->getName(), ".void.addr"}));
11375 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11376 CGM.getModule(), OMPRTL___kmpc_free);
11377 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11378 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11379 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11380 if (UntiedAddr.isValid())
11381 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11383 // Cleanup action for allocate support.
11384 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11385 llvm::FunctionCallee RTLFn;
11386 SourceLocation::UIntTy LocEncoding;
11387 Address Addr;
11388 const Expr *AllocExpr;
11390 public:
11391 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11392 SourceLocation::UIntTy LocEncoding, Address Addr,
11393 const Expr *AllocExpr)
11394 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11395 AllocExpr(AllocExpr) {}
11396 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11397 if (!CGF.HaveInsertPoint())
11398 return;
11399 llvm::Value *Args[3];
11400 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11401 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11402 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11403 Addr.getPointer(), CGF.VoidPtrTy);
11404 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11405 Args[2] = AllocVal;
11406 CGF.EmitRuntimeCall(RTLFn, Args);
11409 Address VDAddr =
11410 UntiedRealAddr.isValid()
11411 ? UntiedRealAddr
11412 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11413 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11414 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11415 VDAddr, Allocator);
11416 if (UntiedRealAddr.isValid())
11417 if (auto *Region =
11418 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11419 Region->emitUntiedSwitch(CGF);
11420 return VDAddr;
11422 return UntiedAddr;
11425 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11426 const VarDecl *VD) const {
11427 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11428 if (It == FunctionToUntiedTaskStackMap.end())
11429 return false;
11430 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11433 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11434 CodeGenModule &CGM, const OMPLoopDirective &S)
11435 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11436 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11437 if (!NeedToPush)
11438 return;
11439 NontemporalDeclsSet &DS =
11440 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11441 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11442 for (const Stmt *Ref : C->private_refs()) {
11443 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11444 const ValueDecl *VD;
11445 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11446 VD = DRE->getDecl();
11447 } else {
11448 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11449 assert((ME->isImplicitCXXThis() ||
11450 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11451 "Expected member of current class.");
11452 VD = ME->getMemberDecl();
11454 DS.insert(VD);
11459 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11460 if (!NeedToPush)
11461 return;
11462 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11465 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11466 CodeGenFunction &CGF,
11467 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11468 std::pair<Address, Address>> &LocalVars)
11469 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11470 if (!NeedToPush)
11471 return;
11472 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11473 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11474 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11477 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11478 if (!NeedToPush)
11479 return;
11480 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11483 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11484 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11486 return llvm::any_of(
11487 CGM.getOpenMPRuntime().NontemporalDeclsStack,
11488 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11491 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11492 const OMPExecutableDirective &S,
11493 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11494 const {
11495 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11496 // Vars in target/task regions must be excluded completely.
11497 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11498 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11499 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11500 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11501 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11502 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11503 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11504 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11507 // Exclude vars in private clauses.
11508 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11509 for (const Expr *Ref : C->varlists()) {
11510 if (!Ref->getType()->isScalarType())
11511 continue;
11512 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11513 if (!DRE)
11514 continue;
11515 NeedToCheckForLPCs.insert(DRE->getDecl());
11518 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11519 for (const Expr *Ref : C->varlists()) {
11520 if (!Ref->getType()->isScalarType())
11521 continue;
11522 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11523 if (!DRE)
11524 continue;
11525 NeedToCheckForLPCs.insert(DRE->getDecl());
11528 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11529 for (const Expr *Ref : C->varlists()) {
11530 if (!Ref->getType()->isScalarType())
11531 continue;
11532 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11533 if (!DRE)
11534 continue;
11535 NeedToCheckForLPCs.insert(DRE->getDecl());
11538 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11539 for (const Expr *Ref : C->varlists()) {
11540 if (!Ref->getType()->isScalarType())
11541 continue;
11542 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11543 if (!DRE)
11544 continue;
11545 NeedToCheckForLPCs.insert(DRE->getDecl());
11548 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11549 for (const Expr *Ref : C->varlists()) {
11550 if (!Ref->getType()->isScalarType())
11551 continue;
11552 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11553 if (!DRE)
11554 continue;
11555 NeedToCheckForLPCs.insert(DRE->getDecl());
11558 for (const Decl *VD : NeedToCheckForLPCs) {
11559 for (const LastprivateConditionalData &Data :
11560 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11561 if (Data.DeclToUniqueName.count(VD) > 0) {
11562 if (!Data.Disabled)
11563 NeedToAddForLPCsAsDisabled.insert(VD);
11564 break;
11570 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11571 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11572 : CGM(CGF.CGM),
11573 Action((CGM.getLangOpts().OpenMP >= 50 &&
11574 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11575 [](const OMPLastprivateClause *C) {
11576 return C->getKind() ==
11577 OMPC_LASTPRIVATE_conditional;
11579 ? ActionToDo::PushAsLastprivateConditional
11580 : ActionToDo::DoNotPush) {
11581 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11582 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11583 return;
11584 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11585 "Expected a push action.");
11586 LastprivateConditionalData &Data =
11587 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11588 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11589 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11590 continue;
11592 for (const Expr *Ref : C->varlists()) {
11593 Data.DeclToUniqueName.insert(std::make_pair(
11594 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11595 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11598 Data.IVLVal = IVLVal;
11599 Data.Fn = CGF.CurFn;
11602 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11603 CodeGenFunction &CGF, const OMPExecutableDirective &S)
11604 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11605 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11606 if (CGM.getLangOpts().OpenMP < 50)
11607 return;
11608 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11609 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11610 if (!NeedToAddForLPCsAsDisabled.empty()) {
11611 Action = ActionToDo::DisableLastprivateConditional;
11612 LastprivateConditionalData &Data =
11613 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11614 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11615 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11616 Data.Fn = CGF.CurFn;
11617 Data.Disabled = true;
11621 CGOpenMPRuntime::LastprivateConditionalRAII
11622 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11623 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11624 return LastprivateConditionalRAII(CGF, S);
11627 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11628 if (CGM.getLangOpts().OpenMP < 50)
11629 return;
11630 if (Action == ActionToDo::DisableLastprivateConditional) {
11631 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11632 "Expected list of disabled private vars.");
11633 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11635 if (Action == ActionToDo::PushAsLastprivateConditional) {
11636 assert(
11637 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11638 "Expected list of lastprivate conditional vars.");
11639 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11643 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11644 const VarDecl *VD) {
11645 ASTContext &C = CGM.getContext();
11646 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11647 if (I == LastprivateConditionalToTypes.end())
11648 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11649 QualType NewType;
11650 const FieldDecl *VDField;
11651 const FieldDecl *FiredField;
11652 LValue BaseLVal;
11653 auto VI = I->getSecond().find(VD);
11654 if (VI == I->getSecond().end()) {
11655 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11656 RD->startDefinition();
11657 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11658 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11659 RD->completeDefinition();
11660 NewType = C.getRecordType(RD);
11661 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11662 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11663 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11664 } else {
11665 NewType = std::get<0>(VI->getSecond());
11666 VDField = std::get<1>(VI->getSecond());
11667 FiredField = std::get<2>(VI->getSecond());
11668 BaseLVal = std::get<3>(VI->getSecond());
11670 LValue FiredLVal =
11671 CGF.EmitLValueForField(BaseLVal, FiredField);
11672 CGF.EmitStoreOfScalar(
11673 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11674 FiredLVal);
11675 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11678 namespace {
11679 /// Checks if the lastprivate conditional variable is referenced in LHS.
11680 class LastprivateConditionalRefChecker final
11681 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11682 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11683 const Expr *FoundE = nullptr;
11684 const Decl *FoundD = nullptr;
11685 StringRef UniqueDeclName;
11686 LValue IVLVal;
11687 llvm::Function *FoundFn = nullptr;
11688 SourceLocation Loc;
11690 public:
11691 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11692 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11693 llvm::reverse(LPM)) {
11694 auto It = D.DeclToUniqueName.find(E->getDecl());
11695 if (It == D.DeclToUniqueName.end())
11696 continue;
11697 if (D.Disabled)
11698 return false;
11699 FoundE = E;
11700 FoundD = E->getDecl()->getCanonicalDecl();
11701 UniqueDeclName = It->second;
11702 IVLVal = D.IVLVal;
11703 FoundFn = D.Fn;
11704 break;
11706 return FoundE == E;
11708 bool VisitMemberExpr(const MemberExpr *E) {
11709 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11710 return false;
11711 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11712 llvm::reverse(LPM)) {
11713 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11714 if (It == D.DeclToUniqueName.end())
11715 continue;
11716 if (D.Disabled)
11717 return false;
11718 FoundE = E;
11719 FoundD = E->getMemberDecl()->getCanonicalDecl();
11720 UniqueDeclName = It->second;
11721 IVLVal = D.IVLVal;
11722 FoundFn = D.Fn;
11723 break;
11725 return FoundE == E;
11727 bool VisitStmt(const Stmt *S) {
11728 for (const Stmt *Child : S->children()) {
11729 if (!Child)
11730 continue;
11731 if (const auto *E = dyn_cast<Expr>(Child))
11732 if (!E->isGLValue())
11733 continue;
11734 if (Visit(Child))
11735 return true;
11737 return false;
11739 explicit LastprivateConditionalRefChecker(
11740 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11741 : LPM(LPM) {}
11742 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11743 getFoundData() const {
11744 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11747 } // namespace
11749 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11750 LValue IVLVal,
11751 StringRef UniqueDeclName,
11752 LValue LVal,
11753 SourceLocation Loc) {
11754 // Last updated loop counter for the lastprivate conditional var.
11755 // int<xx> last_iv = 0;
11756 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11757 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11758 LLIVTy, getName({UniqueDeclName, "iv"}));
11759 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11760 IVLVal.getAlignment().getAsAlign());
11761 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11763 // Last value of the lastprivate conditional.
11764 // decltype(priv_a) last_a;
11765 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11766 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11767 Last->setAlignment(LVal.getAlignment().getAsAlign());
11768 LValue LastLVal = CGF.MakeAddrLValue(
11769 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
11771 // Global loop counter. Required to handle inner parallel-for regions.
11772 // iv
11773 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11775 // #pragma omp critical(a)
11776 // if (last_iv <= iv) {
11777 // last_iv = iv;
11778 // last_a = priv_a;
11779 // }
11780 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11781 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11782 Action.Enter(CGF);
11783 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11784 // (last_iv <= iv) ? Check if the variable is updated and store new
11785 // value in global var.
11786 llvm::Value *CmpRes;
11787 if (IVLVal.getType()->isSignedIntegerType()) {
11788 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11789 } else {
11790 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11791 "Loop iteration variable must be integer.");
11792 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11794 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11795 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11796 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11797 // {
11798 CGF.EmitBlock(ThenBB);
11800 // last_iv = iv;
11801 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11803 // last_a = priv_a;
11804 switch (CGF.getEvaluationKind(LVal.getType())) {
11805 case TEK_Scalar: {
11806 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11807 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11808 break;
11810 case TEK_Complex: {
11811 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11812 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11813 break;
11815 case TEK_Aggregate:
11816 llvm_unreachable(
11817 "Aggregates are not supported in lastprivate conditional.");
11819 // }
11820 CGF.EmitBranch(ExitBB);
11821 // There is no need to emit line number for unconditional branch.
11822 (void)ApplyDebugLocation::CreateEmpty(CGF);
11823 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11826 if (CGM.getLangOpts().OpenMPSimd) {
11827 // Do not emit as a critical region as no parallel region could be emitted.
11828 RegionCodeGenTy ThenRCG(CodeGen);
11829 ThenRCG(CGF);
11830 } else {
11831 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11835 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11836 const Expr *LHS) {
11837 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11838 return;
11839 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11840 if (!Checker.Visit(LHS))
11841 return;
11842 const Expr *FoundE;
11843 const Decl *FoundD;
11844 StringRef UniqueDeclName;
11845 LValue IVLVal;
11846 llvm::Function *FoundFn;
11847 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11848 Checker.getFoundData();
11849 if (FoundFn != CGF.CurFn) {
11850 // Special codegen for inner parallel regions.
11851 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11852 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11853 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11854 "Lastprivate conditional is not found in outer region.");
11855 QualType StructTy = std::get<0>(It->getSecond());
11856 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11857 LValue PrivLVal = CGF.EmitLValue(FoundE);
11858 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11859 PrivLVal.getAddress(CGF),
11860 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11861 CGF.ConvertTypeForMem(StructTy));
11862 LValue BaseLVal =
11863 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11864 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11865 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11866 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11867 FiredLVal, llvm::AtomicOrdering::Unordered,
11868 /*IsVolatile=*/true, /*isInit=*/false);
11869 return;
11872 // Private address of the lastprivate conditional in the current context.
11873 // priv_a
11874 LValue LVal = CGF.EmitLValue(FoundE);
11875 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11876 FoundE->getExprLoc());
11879 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11880 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11881 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11882 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11883 return;
11884 auto Range = llvm::reverse(LastprivateConditionalStack);
11885 auto It = llvm::find_if(
11886 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11887 if (It == Range.end() || It->Fn != CGF.CurFn)
11888 return;
11889 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11890 assert(LPCI != LastprivateConditionalToTypes.end() &&
11891 "Lastprivates must be registered already.");
11892 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11893 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11894 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11895 for (const auto &Pair : It->DeclToUniqueName) {
11896 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11897 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11898 continue;
11899 auto I = LPCI->getSecond().find(Pair.first);
11900 assert(I != LPCI->getSecond().end() &&
11901 "Lastprivate must be rehistered already.");
11902 // bool Cmp = priv_a.Fired != 0;
11903 LValue BaseLVal = std::get<3>(I->getSecond());
11904 LValue FiredLVal =
11905 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11906 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11907 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11908 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11909 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11910 // if (Cmp) {
11911 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11912 CGF.EmitBlock(ThenBB);
11913 Address Addr = CGF.GetAddrOfLocalVar(VD);
11914 LValue LVal;
11915 if (VD->getType()->isReferenceType())
11916 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11917 AlignmentSource::Decl);
11918 else
11919 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11920 AlignmentSource::Decl);
11921 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11922 D.getBeginLoc());
11923 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11924 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11925 // }
11929 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11930 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11931 SourceLocation Loc) {
11932 if (CGF.getLangOpts().OpenMP < 50)
11933 return;
11934 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11935 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11936 "Unknown lastprivate conditional variable.");
11937 StringRef UniqueName = It->second;
11938 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11939 // The variable was not updated in the region - exit.
11940 if (!GV)
11941 return;
11942 LValue LPLVal = CGF.MakeAddrLValue(
11943 Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
11944 PrivLVal.getType().getNonReferenceType());
11945 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11946 CGF.EmitStoreOfScalar(Res, PrivLVal);
11949 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11950 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11951 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11952 const RegionCodeGenTy &CodeGen) {
11953 llvm_unreachable("Not supported in SIMD-only mode");
11956 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11957 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11958 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11959 const RegionCodeGenTy &CodeGen) {
11960 llvm_unreachable("Not supported in SIMD-only mode");
11963 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11964 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11965 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11966 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11967 bool Tied, unsigned &NumberOfParts) {
11968 llvm_unreachable("Not supported in SIMD-only mode");
11971 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11972 SourceLocation Loc,
11973 llvm::Function *OutlinedFn,
11974 ArrayRef<llvm::Value *> CapturedVars,
11975 const Expr *IfCond,
11976 llvm::Value *NumThreads) {
11977 llvm_unreachable("Not supported in SIMD-only mode");
11980 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11981 CodeGenFunction &CGF, StringRef CriticalName,
11982 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11983 const Expr *Hint) {
11984 llvm_unreachable("Not supported in SIMD-only mode");
11987 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11988 const RegionCodeGenTy &MasterOpGen,
11989 SourceLocation Loc) {
11990 llvm_unreachable("Not supported in SIMD-only mode");
11993 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
11994 const RegionCodeGenTy &MasterOpGen,
11995 SourceLocation Loc,
11996 const Expr *Filter) {
11997 llvm_unreachable("Not supported in SIMD-only mode");
12000 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12001 SourceLocation Loc) {
12002 llvm_unreachable("Not supported in SIMD-only mode");
12005 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12006 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12007 SourceLocation Loc) {
12008 llvm_unreachable("Not supported in SIMD-only mode");
12011 void CGOpenMPSIMDRuntime::emitSingleRegion(
12012 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12013 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12014 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12015 ArrayRef<const Expr *> AssignmentOps) {
12016 llvm_unreachable("Not supported in SIMD-only mode");
12019 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12020 const RegionCodeGenTy &OrderedOpGen,
12021 SourceLocation Loc,
12022 bool IsThreads) {
12023 llvm_unreachable("Not supported in SIMD-only mode");
12026 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12027 SourceLocation Loc,
12028 OpenMPDirectiveKind Kind,
12029 bool EmitChecks,
12030 bool ForceSimpleCall) {
12031 llvm_unreachable("Not supported in SIMD-only mode");
12034 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12035 CodeGenFunction &CGF, SourceLocation Loc,
12036 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12037 bool Ordered, const DispatchRTInput &DispatchValues) {
12038 llvm_unreachable("Not supported in SIMD-only mode");
12041 void CGOpenMPSIMDRuntime::emitForStaticInit(
12042 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12043 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12044 llvm_unreachable("Not supported in SIMD-only mode");
12047 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12048 CodeGenFunction &CGF, SourceLocation Loc,
12049 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12050 llvm_unreachable("Not supported in SIMD-only mode");
12053 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12054 SourceLocation Loc,
12055 unsigned IVSize,
12056 bool IVSigned) {
12057 llvm_unreachable("Not supported in SIMD-only mode");
12060 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12061 SourceLocation Loc,
12062 OpenMPDirectiveKind DKind) {
12063 llvm_unreachable("Not supported in SIMD-only mode");
12066 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12067 SourceLocation Loc,
12068 unsigned IVSize, bool IVSigned,
12069 Address IL, Address LB,
12070 Address UB, Address ST) {
12071 llvm_unreachable("Not supported in SIMD-only mode");
12074 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12075 llvm::Value *NumThreads,
12076 SourceLocation Loc) {
12077 llvm_unreachable("Not supported in SIMD-only mode");
12080 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12081 ProcBindKind ProcBind,
12082 SourceLocation Loc) {
12083 llvm_unreachable("Not supported in SIMD-only mode");
12086 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12087 const VarDecl *VD,
12088 Address VDAddr,
12089 SourceLocation Loc) {
12090 llvm_unreachable("Not supported in SIMD-only mode");
12093 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12094 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12095 CodeGenFunction *CGF) {
12096 llvm_unreachable("Not supported in SIMD-only mode");
12099 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12100 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12101 llvm_unreachable("Not supported in SIMD-only mode");
12104 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12105 ArrayRef<const Expr *> Vars,
12106 SourceLocation Loc,
12107 llvm::AtomicOrdering AO) {
12108 llvm_unreachable("Not supported in SIMD-only mode");
12111 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12112 const OMPExecutableDirective &D,
12113 llvm::Function *TaskFunction,
12114 QualType SharedsTy, Address Shareds,
12115 const Expr *IfCond,
12116 const OMPTaskDataTy &Data) {
12117 llvm_unreachable("Not supported in SIMD-only mode");
12120 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12121 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12122 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12123 const Expr *IfCond, const OMPTaskDataTy &Data) {
12124 llvm_unreachable("Not supported in SIMD-only mode");
12127 void CGOpenMPSIMDRuntime::emitReduction(
12128 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12129 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12130 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12131 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12132 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12133 ReductionOps, Options);
12136 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12137 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12138 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12139 llvm_unreachable("Not supported in SIMD-only mode");
12142 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12143 SourceLocation Loc,
12144 bool IsWorksharingReduction) {
12145 llvm_unreachable("Not supported in SIMD-only mode");
12148 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12149 SourceLocation Loc,
12150 ReductionCodeGen &RCG,
12151 unsigned N) {
12152 llvm_unreachable("Not supported in SIMD-only mode");
12155 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12156 SourceLocation Loc,
12157 llvm::Value *ReductionsPtr,
12158 LValue SharedLVal) {
12159 llvm_unreachable("Not supported in SIMD-only mode");
12162 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12163 SourceLocation Loc,
12164 const OMPTaskDataTy &Data) {
12165 llvm_unreachable("Not supported in SIMD-only mode");
12168 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12169 CodeGenFunction &CGF, SourceLocation Loc,
12170 OpenMPDirectiveKind CancelRegion) {
12171 llvm_unreachable("Not supported in SIMD-only mode");
12174 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12175 SourceLocation Loc, const Expr *IfCond,
12176 OpenMPDirectiveKind CancelRegion) {
12177 llvm_unreachable("Not supported in SIMD-only mode");
12180 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12181 const OMPExecutableDirective &D, StringRef ParentName,
12182 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12183 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12184 llvm_unreachable("Not supported in SIMD-only mode");
12187 void CGOpenMPSIMDRuntime::emitTargetCall(
12188 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12189 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12190 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12191 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12192 const OMPLoopDirective &D)>
12193 SizeEmitter) {
12194 llvm_unreachable("Not supported in SIMD-only mode");
12197 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12198 llvm_unreachable("Not supported in SIMD-only mode");
12201 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12202 llvm_unreachable("Not supported in SIMD-only mode");
12205 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12206 return false;
12209 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12210 const OMPExecutableDirective &D,
12211 SourceLocation Loc,
12212 llvm::Function *OutlinedFn,
12213 ArrayRef<llvm::Value *> CapturedVars) {
12214 llvm_unreachable("Not supported in SIMD-only mode");
12217 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12218 const Expr *NumTeams,
12219 const Expr *ThreadLimit,
12220 SourceLocation Loc) {
12221 llvm_unreachable("Not supported in SIMD-only mode");
12224 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12225 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12226 const Expr *Device, const RegionCodeGenTy &CodeGen,
12227 CGOpenMPRuntime::TargetDataInfo &Info) {
12228 llvm_unreachable("Not supported in SIMD-only mode");
12231 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12232 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12233 const Expr *Device) {
12234 llvm_unreachable("Not supported in SIMD-only mode");
12237 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12238 const OMPLoopDirective &D,
12239 ArrayRef<Expr *> NumIterations) {
12240 llvm_unreachable("Not supported in SIMD-only mode");
12243 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12244 const OMPDependClause *C) {
12245 llvm_unreachable("Not supported in SIMD-only mode");
12248 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12249 const OMPDoacrossClause *C) {
12250 llvm_unreachable("Not supported in SIMD-only mode");
12253 const VarDecl *
12254 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12255 const VarDecl *NativeParam) const {
12256 llvm_unreachable("Not supported in SIMD-only mode");
12259 Address
12260 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12261 const VarDecl *NativeParam,
12262 const VarDecl *TargetParam) const {
12263 llvm_unreachable("Not supported in SIMD-only mode");