cmake: Inline the add_llvm_symbol_exports.py script
[llvm-project.git] / clang / lib / CodeGen / CGOpenMPRuntime.cpp
blobb87e69b641a63c98734efa466459fe58e77bda92
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
11 //===----------------------------------------------------------------------===//
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54 /// Kinds of OpenMP regions used in codegen.
55 enum CGOpenMPRegionKind {
56 /// Region with outlined function for standalone 'parallel'
57 /// directive.
58 ParallelOutlinedRegion,
59 /// Region with outlined function for standalone 'task' directive.
60 TaskOutlinedRegion,
61 /// Region for constructs that do not require function outlining,
62 /// like 'for', 'sections', 'atomic' etc. directives.
63 InlinedRegion,
64 /// Region with outlined function for standalone 'target' directive.
65 TargetRegion,
68 CGOpenMPRegionInfo(const CapturedStmt &CS,
69 const CGOpenMPRegionKind RegionKind,
70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71 bool HasCancel)
72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77 bool HasCancel)
78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79 Kind(Kind), HasCancel(HasCancel) {}
81 /// Get a variable or parameter for storing global thread id
82 /// inside OpenMP construct.
83 virtual const VarDecl *getThreadIDVariable() const = 0;
85 /// Emit the captured statement body.
86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
88 /// Get an LValue for the current ThreadID variable.
89 /// \return LValue for thread id variable. This LValue always has type int32*.
90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
98 bool hasCancel() const { return HasCancel; }
100 static bool classof(const CGCapturedStmtInfo *Info) {
101 return Info->getKind() == CR_OpenMP;
104 ~CGOpenMPRegionInfo() override = default;
106 protected:
107 CGOpenMPRegionKind RegionKind;
108 RegionCodeGenTy CodeGen;
109 OpenMPDirectiveKind Kind;
110 bool HasCancel;
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117 const RegionCodeGenTy &CodeGen,
118 OpenMPDirectiveKind Kind, bool HasCancel,
119 StringRef HelperName)
120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121 HasCancel),
122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 /// Get a variable or parameter for storing global thread id
127 /// inside OpenMP construct.
128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
130 /// Get the name of the capture helper.
131 StringRef getHelperName() const override { return HelperName; }
133 static bool classof(const CGCapturedStmtInfo *Info) {
134 return CGOpenMPRegionInfo::classof(Info) &&
135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136 ParallelOutlinedRegion;
139 private:
140 /// A variable or parameter storing global thread id for OpenMP
141 /// constructs.
142 const VarDecl *ThreadIDVar;
143 StringRef HelperName;
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149 class UntiedTaskActionTy final : public PrePostActionTy {
150 bool Untied;
151 const VarDecl *PartIDVar;
152 const RegionCodeGenTy UntiedCodeGen;
153 llvm::SwitchInst *UntiedSwitch = nullptr;
155 public:
156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157 const RegionCodeGenTy &UntiedCodeGen)
158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159 void Enter(CodeGenFunction &CGF) override {
160 if (Untied) {
161 // Emit task switching point.
162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163 CGF.GetAddrOfLocalVar(PartIDVar),
164 PartIDVar->getType()->castAs<PointerType>());
165 llvm::Value *Res =
166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169 CGF.EmitBlock(DoneBB);
170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173 CGF.Builder.GetInsertBlock());
174 emitUntiedSwitch(CGF);
177 void emitUntiedSwitch(CodeGenFunction &CGF) const {
178 if (Untied) {
179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180 CGF.GetAddrOfLocalVar(PartIDVar),
181 PartIDVar->getType()->castAs<PointerType>());
182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183 PartIdLVal);
184 UntiedCodeGen(CGF);
185 CodeGenFunction::JumpDest CurPoint =
186 CGF.getJumpDestInCurrentScope(".untied.next.");
187 CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190 CGF.Builder.GetInsertBlock());
191 CGF.EmitBranchThroughCleanup(CurPoint);
192 CGF.EmitBlock(CurPoint.getBlock());
195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198 const VarDecl *ThreadIDVar,
199 const RegionCodeGenTy &CodeGen,
200 OpenMPDirectiveKind Kind, bool HasCancel,
201 const UntiedTaskActionTy &Action)
202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203 ThreadIDVar(ThreadIDVar), Action(Action) {
204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 /// Get a variable or parameter for storing global thread id
208 /// inside OpenMP construct.
209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
211 /// Get an LValue for the current ThreadID variable.
212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
214 /// Get the name of the capture helper.
215 StringRef getHelperName() const override { return ".omp_outlined."; }
217 void emitUntiedSwitch(CodeGenFunction &CGF) override {
218 Action.emitUntiedSwitch(CGF);
221 static bool classof(const CGCapturedStmtInfo *Info) {
222 return CGOpenMPRegionInfo::classof(Info) &&
223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224 TaskOutlinedRegion;
227 private:
228 /// A variable or parameter storing global thread id for OpenMP
229 /// constructs.
230 const VarDecl *ThreadIDVar;
231 /// Action for emitting code for untied tasks.
232 const UntiedTaskActionTy &Action;
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240 const RegionCodeGenTy &CodeGen,
241 OpenMPDirectiveKind Kind, bool HasCancel)
242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243 OldCSI(OldCSI),
244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
246 // Retrieve the value of the context parameter.
247 llvm::Value *getContextValue() const override {
248 if (OuterRegionInfo)
249 return OuterRegionInfo->getContextValue();
250 llvm_unreachable("No context value for inlined OpenMP region");
253 void setContextValue(llvm::Value *V) override {
254 if (OuterRegionInfo) {
255 OuterRegionInfo->setContextValue(V);
256 return;
258 llvm_unreachable("No context value for inlined OpenMP region");
261 /// Lookup the captured field decl for a variable.
262 const FieldDecl *lookup(const VarDecl *VD) const override {
263 if (OuterRegionInfo)
264 return OuterRegionInfo->lookup(VD);
265 // If there is no outer outlined region,no need to lookup in a list of
266 // captured variables, we can use the original one.
267 return nullptr;
270 FieldDecl *getThisFieldDecl() const override {
271 if (OuterRegionInfo)
272 return OuterRegionInfo->getThisFieldDecl();
273 return nullptr;
276 /// Get a variable or parameter for storing global thread id
277 /// inside OpenMP construct.
278 const VarDecl *getThreadIDVariable() const override {
279 if (OuterRegionInfo)
280 return OuterRegionInfo->getThreadIDVariable();
281 return nullptr;
284 /// Get an LValue for the current ThreadID variable.
285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286 if (OuterRegionInfo)
287 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288 llvm_unreachable("No LValue for inlined OpenMP construct");
291 /// Get the name of the capture helper.
292 StringRef getHelperName() const override {
293 if (auto *OuterRegionInfo = getOldCSI())
294 return OuterRegionInfo->getHelperName();
295 llvm_unreachable("No helper name for inlined OpenMP construct");
298 void emitUntiedSwitch(CodeGenFunction &CGF) override {
299 if (OuterRegionInfo)
300 OuterRegionInfo->emitUntiedSwitch(CGF);
303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
305 static bool classof(const CGCapturedStmtInfo *Info) {
306 return CGOpenMPRegionInfo::classof(Info) &&
307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310 ~CGOpenMPInlinedRegionInfo() override = default;
312 private:
313 /// CodeGen info about outer OpenMP region.
314 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315 CGOpenMPRegionInfo *OuterRegionInfo;
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326 const RegionCodeGenTy &CodeGen, StringRef HelperName)
327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328 /*HasCancel=*/false),
329 HelperName(HelperName) {}
331 /// This is unused for target regions because each starts executing
332 /// with a single thread.
333 const VarDecl *getThreadIDVariable() const override { return nullptr; }
335 /// Get the name of the capture helper.
336 StringRef getHelperName() const override { return HelperName; }
338 static bool classof(const CGCapturedStmtInfo *Info) {
339 return CGOpenMPRegionInfo::classof(Info) &&
340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343 private:
344 StringRef HelperName;
347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348 llvm_unreachable("No codegen for expressions");
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356 OMPD_unknown,
357 /*HasCancel=*/false),
358 PrivScope(CGF) {
359 // Make sure the globals captured in the provided statement are local by
360 // using the privatization logic. We assume the same variable is not
361 // captured more than once.
362 for (const auto &C : CS.captures()) {
363 if (!C.capturesVariable() && !C.capturesVariableByCopy())
364 continue;
366 const VarDecl *VD = C.getCapturedVar();
367 if (VD->isLocalVarDeclOrParm())
368 continue;
370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371 /*RefersToEnclosingVariableOrCapture=*/false,
372 VD->getType().getNonReferenceType(), VK_LValue,
373 C.getLocation());
374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
376 (void)PrivScope.Privatize();
379 /// Lookup the captured field decl for a variable.
380 const FieldDecl *lookup(const VarDecl *VD) const override {
381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382 return FD;
383 return nullptr;
386 /// Emit the captured statement body.
387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388 llvm_unreachable("No body for expressions");
391 /// Get a variable or parameter for storing global thread id
392 /// inside OpenMP construct.
393 const VarDecl *getThreadIDVariable() const override {
394 llvm_unreachable("No thread id for expressions");
397 /// Get the name of the capture helper.
398 StringRef getHelperName() const override {
399 llvm_unreachable("No helper name for expressions");
402 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
404 private:
405 /// Private scope to capture global variables.
406 CodeGenFunction::OMPPrivateScope PrivScope;
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411 CodeGenFunction &CGF;
412 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
413 FieldDecl *LambdaThisCaptureField = nullptr;
414 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415 bool NoInheritance = false;
417 public:
418 /// Constructs region for combined constructs.
419 /// \param CodeGen Code generation sequence for combined directives. Includes
420 /// a list of functions used for code generation of implicitly inlined
421 /// regions.
422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423 OpenMPDirectiveKind Kind, bool HasCancel,
424 bool NoInheritance = true)
425 : CGF(CGF), NoInheritance(NoInheritance) {
426 // Start emission for the construct.
427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429 if (NoInheritance) {
430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432 CGF.LambdaThisCaptureField = nullptr;
433 BlockInfo = CGF.BlockInfo;
434 CGF.BlockInfo = nullptr;
438 ~InlinedOpenMPRegionRAII() {
439 // Restore original CapturedStmtInfo only if we're done with code emission.
440 auto *OldCSI =
441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442 delete CGF.CapturedStmtInfo;
443 CGF.CapturedStmtInfo = OldCSI;
444 if (NoInheritance) {
445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447 CGF.BlockInfo = BlockInfo;
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456 /// Use trampoline for internal microtask.
457 OMP_IDENT_IMD = 0x01,
458 /// Use c-style ident structure.
459 OMP_IDENT_KMPC = 0x02,
460 /// Atomic reduction option for kmpc_reduce.
461 OMP_ATOMIC_REDUCE = 0x10,
462 /// Explicit 'barrier' directive.
463 OMP_IDENT_BARRIER_EXPL = 0x20,
464 /// Implicit barrier in code.
465 OMP_IDENT_BARRIER_IMPL = 0x40,
466 /// Implicit barrier in 'for' directive.
467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468 /// Implicit barrier in 'sections' directive.
469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470 /// Implicit barrier in 'single' directive.
471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472 /// Call of __kmp_for_static_init for static loop.
473 OMP_IDENT_WORK_LOOP = 0x200,
474 /// Call of __kmp_for_static_init for sections.
475 OMP_IDENT_WORK_SECTIONS = 0x400,
476 /// Call of __kmp_for_static_init for distribute.
477 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481 namespace {
482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
483 /// Values for bit flags for marking which requires clauses have been used.
484 enum OpenMPOffloadingRequiresDirFlags : int64_t {
485 /// flag undefined.
486 OMP_REQ_UNDEFINED = 0x000,
487 /// no requires clause present.
488 OMP_REQ_NONE = 0x001,
489 /// reverse_offload clause.
490 OMP_REQ_REVERSE_OFFLOAD = 0x002,
491 /// unified_address clause.
492 OMP_REQ_UNIFIED_ADDRESS = 0x004,
493 /// unified_shared_memory clause.
494 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
495 /// dynamic_allocators clause.
496 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
497 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
500 enum OpenMPOffloadingReservedDeviceIDs {
501 /// Device ID if the device was not defined, runtime should get it
502 /// from environment variables in the spec.
503 OMP_DEVICEID_UNDEF = -1,
505 } // anonymous namespace
507 /// Describes ident structure that describes a source location.
508 /// All descriptions are taken from
509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510 /// Original structure:
511 /// typedef struct ident {
512 /// kmp_int32 reserved_1; /**< might be used in Fortran;
513 /// see above */
514 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
515 /// KMP_IDENT_KMPC identifies this union
516 /// member */
517 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
518 /// see above */
519 ///#if USE_ITT_BUILD
520 /// /* but currently used for storing
521 /// region-specific ITT */
522 /// /* contextual information. */
523 ///#endif /* USE_ITT_BUILD */
524 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
525 /// C++ */
526 /// char const *psource; /**< String describing the source location.
527 /// The string is composed of semi-colon separated
528 // fields which describe the source file,
529 /// the function and a pair of line numbers that
530 /// delimit the construct.
531 /// */
532 /// } ident_t;
533 enum IdentFieldIndex {
534 /// might be used in Fortran
535 IdentField_Reserved_1,
536 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537 IdentField_Flags,
538 /// Not really used in Fortran any more
539 IdentField_Reserved_2,
540 /// Source[4] in Fortran, do not use for C++
541 IdentField_Reserved_3,
542 /// String describing the source location. The string is composed of
543 /// semi-colon separated fields which describe the source file, the function
544 /// and a pair of line numbers that delimit the construct.
545 IdentField_PSource
548 /// Schedule types for 'omp for' loops (these enumerators are taken from
549 /// the enum sched_type in kmp.h).
550 enum OpenMPSchedType {
551 /// Lower bound for default (unordered) versions.
552 OMP_sch_lower = 32,
553 OMP_sch_static_chunked = 33,
554 OMP_sch_static = 34,
555 OMP_sch_dynamic_chunked = 35,
556 OMP_sch_guided_chunked = 36,
557 OMP_sch_runtime = 37,
558 OMP_sch_auto = 38,
559 /// static with chunk adjustment (e.g., simd)
560 OMP_sch_static_balanced_chunked = 45,
561 /// Lower bound for 'ordered' versions.
562 OMP_ord_lower = 64,
563 OMP_ord_static_chunked = 65,
564 OMP_ord_static = 66,
565 OMP_ord_dynamic_chunked = 67,
566 OMP_ord_guided_chunked = 68,
567 OMP_ord_runtime = 69,
568 OMP_ord_auto = 70,
569 OMP_sch_default = OMP_sch_static,
570 /// dist_schedule types
571 OMP_dist_sch_static_chunked = 91,
572 OMP_dist_sch_static = 92,
573 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574 /// Set if the monotonic schedule modifier was present.
575 OMP_sch_modifier_monotonic = (1 << 29),
576 /// Set if the nonmonotonic schedule modifier was present.
577 OMP_sch_modifier_nonmonotonic = (1 << 30),
580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581 /// region.
582 class CleanupTy final : public EHScopeStack::Cleanup {
583 PrePostActionTy *Action;
585 public:
586 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
587 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588 if (!CGF.HaveInsertPoint())
589 return;
590 Action->Exit(CGF);
594 } // anonymous namespace
596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
597 CodeGenFunction::RunCleanupsScope Scope(CGF);
598 if (PrePostAction) {
599 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600 Callback(CodeGen, CGF, *PrePostAction);
601 } else {
602 PrePostActionTy Action;
603 Callback(CodeGen, CGF, Action);
607 /// Check if the combiner is a call to UDR combiner and if it is so return the
608 /// UDR decl used for reduction.
609 static const OMPDeclareReductionDecl *
610 getReductionInit(const Expr *ReductionOp) {
611 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613 if (const auto *DRE =
614 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616 return DRD;
617 return nullptr;
620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
621 const OMPDeclareReductionDecl *DRD,
622 const Expr *InitOp,
623 Address Private, Address Original,
624 QualType Ty) {
625 if (DRD->getInitializer()) {
626 std::pair<llvm::Function *, llvm::Function *> Reduction =
627 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
628 const auto *CE = cast<CallExpr>(InitOp);
629 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632 const auto *LHSDRE =
633 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634 const auto *RHSDRE =
635 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
639 (void)PrivateScope.Privatize();
640 RValue Func = RValue::get(Reduction.second);
641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642 CGF.EmitIgnoredExpr(InitOp);
643 } else {
644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646 auto *GV = new llvm::GlobalVariable(
647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648 llvm::GlobalValue::PrivateLinkage, Init, Name);
649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650 RValue InitRVal;
651 switch (CGF.getEvaluationKind(Ty)) {
652 case TEK_Scalar:
653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654 break;
655 case TEK_Complex:
656 InitRVal =
657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658 break;
659 case TEK_Aggregate: {
660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663 /*IsInitializer=*/false);
664 return;
667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670 /*IsInitializer=*/false);
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680 QualType Type, bool EmitDeclareReductionInit,
681 const Expr *Init,
682 const OMPDeclareReductionDecl *DRD,
683 Address SrcAddr = Address::invalid()) {
684 // Perform element-by-element initialization.
685 QualType ElementTy;
687 // Drill down to the base element type on both arrays.
688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690 if (DRD)
691 SrcAddr =
692 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
694 llvm::Value *SrcBegin = nullptr;
695 if (DRD)
696 SrcBegin = SrcAddr.getPointer();
697 llvm::Value *DestBegin = DestAddr.getPointer();
698 // Cast from pointer to array type to pointer to single element.
699 llvm::Value *DestEnd =
700 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
701 // The basic structure here is a while-do loop.
702 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704 llvm::Value *IsEmpty =
705 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
708 // Enter the loop body, making that address the current address.
709 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710 CGF.EmitBlock(BodyBB);
712 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
714 llvm::PHINode *SrcElementPHI = nullptr;
715 Address SrcElementCurrent = Address::invalid();
716 if (DRD) {
717 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718 "omp.arraycpy.srcElementPast");
719 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720 SrcElementCurrent =
721 Address(SrcElementPHI, SrcAddr.getElementType(),
722 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726 DestElementPHI->addIncoming(DestBegin, EntryBB);
727 Address DestElementCurrent =
728 Address(DestElementPHI, DestAddr.getElementType(),
729 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
731 // Emit copy.
733 CodeGenFunction::RunCleanupsScope InitScope(CGF);
734 if (EmitDeclareReductionInit) {
735 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736 SrcElementCurrent, ElementTy);
737 } else
738 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739 /*IsInitializer=*/false);
742 if (DRD) {
743 // Shift the address forward by one element.
744 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
746 "omp.arraycpy.dest.element");
747 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
750 // Shift the address forward by one element.
751 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
753 "omp.arraycpy.dest.element");
754 // Check whether we've reached the end.
755 llvm::Value *Done =
756 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
757 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
758 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
760 // Done.
761 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
765 return CGF.EmitOMPSharedLValue(E);
768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
769 const Expr *E) {
770 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
771 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
772 return LValue();
775 void ReductionCodeGen::emitAggregateInitialization(
776 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
777 const OMPDeclareReductionDecl *DRD) {
778 // Emit VarDecl with copy init for arrays.
779 // Get the address of the original variable captured in current
780 // captured region.
781 const auto *PrivateVD =
782 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
783 bool EmitDeclareReductionInit =
784 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
785 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
786 EmitDeclareReductionInit,
787 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
788 : PrivateVD->getInit(),
789 DRD, SharedAddr);
792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
793 ArrayRef<const Expr *> Origs,
794 ArrayRef<const Expr *> Privates,
795 ArrayRef<const Expr *> ReductionOps) {
796 ClausesData.reserve(Shareds.size());
797 SharedAddresses.reserve(Shareds.size());
798 Sizes.reserve(Shareds.size());
799 BaseDecls.reserve(Shareds.size());
800 const auto *IOrig = Origs.begin();
801 const auto *IPriv = Privates.begin();
802 const auto *IRed = ReductionOps.begin();
803 for (const Expr *Ref : Shareds) {
804 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
805 std::advance(IOrig, 1);
806 std::advance(IPriv, 1);
807 std::advance(IRed, 1);
811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
812 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
813 "Number of generated lvalues must be exactly N.");
814 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
815 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
816 SharedAddresses.emplace_back(First, Second);
817 if (ClausesData[N].Shared == ClausesData[N].Ref) {
818 OrigAddresses.emplace_back(First, Second);
819 } else {
820 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
821 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
822 OrigAddresses.emplace_back(First, Second);
826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
827 QualType PrivateType = getPrivateType(N);
828 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829 if (!PrivateType->isVariablyModifiedType()) {
830 Sizes.emplace_back(
831 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832 nullptr);
833 return;
835 llvm::Value *Size;
836 llvm::Value *SizeInChars;
837 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
838 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
839 if (AsArraySection) {
840 Size = CGF.Builder.CreatePtrDiff(ElemType,
841 OrigAddresses[N].second.getPointer(CGF),
842 OrigAddresses[N].first.getPointer(CGF));
843 Size = CGF.Builder.CreateNUWAdd(
844 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
845 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
846 } else {
847 SizeInChars =
848 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
849 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
851 Sizes.emplace_back(SizeInChars, Size);
852 CodeGenFunction::OpaqueValueMapping OpaqueMap(
853 CGF,
854 cast<OpaqueValueExpr>(
855 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
856 RValue::get(Size));
857 CGF.EmitVariablyModifiedType(PrivateType);
860 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
861 llvm::Value *Size) {
862 QualType PrivateType = getPrivateType(N);
863 if (!PrivateType->isVariablyModifiedType()) {
864 assert(!Size && !Sizes[N].second &&
865 "Size should be nullptr for non-variably modified reduction "
866 "items.");
867 return;
869 CodeGenFunction::OpaqueValueMapping OpaqueMap(
870 CGF,
871 cast<OpaqueValueExpr>(
872 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
873 RValue::get(Size));
874 CGF.EmitVariablyModifiedType(PrivateType);
877 void ReductionCodeGen::emitInitialization(
878 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
879 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
880 assert(SharedAddresses.size() > N && "No variable was generated");
881 const auto *PrivateVD =
882 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
883 const OMPDeclareReductionDecl *DRD =
884 getReductionInit(ClausesData[N].ReductionOp);
885 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
886 if (DRD && DRD->getInitializer())
887 (void)DefaultInit(CGF);
888 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
889 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
890 (void)DefaultInit(CGF);
891 QualType SharedType = SharedAddresses[N].first.getType();
892 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
893 PrivateAddr, SharedAddr, SharedType);
894 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897 PrivateVD->getType().getQualifiers(),
898 /*IsInitializer=*/false);
902 bool ReductionCodeGen::needCleanups(unsigned N) {
903 QualType PrivateType = getPrivateType(N);
904 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
905 return DTorKind != QualType::DK_none;
908 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
909 Address PrivateAddr) {
910 QualType PrivateType = getPrivateType(N);
911 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
912 if (needCleanups(N)) {
913 PrivateAddr = CGF.Builder.CreateElementBitCast(
914 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
915 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
919 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
920 LValue BaseLV) {
921 BaseTy = BaseTy.getNonReferenceType();
922 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
923 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
924 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
925 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
926 } else {
927 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
928 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
930 BaseTy = BaseTy->getPointeeType();
932 return CGF.MakeAddrLValue(
933 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
934 CGF.ConvertTypeForMem(ElTy)),
935 BaseLV.getType(), BaseLV.getBaseInfo(),
936 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
939 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940 Address OriginalBaseAddress, llvm::Value *Addr) {
941 Address Tmp = Address::invalid();
942 Address TopTmp = Address::invalid();
943 Address MostTopTmp = Address::invalid();
944 BaseTy = BaseTy.getNonReferenceType();
945 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
946 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
947 Tmp = CGF.CreateMemTemp(BaseTy);
948 if (TopTmp.isValid())
949 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
950 else
951 MostTopTmp = Tmp;
952 TopTmp = Tmp;
953 BaseTy = BaseTy->getPointeeType();
956 if (Tmp.isValid()) {
957 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
958 Addr, Tmp.getElementType());
959 CGF.Builder.CreateStore(Addr, Tmp);
960 return MostTopTmp;
963 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
964 Addr, OriginalBaseAddress.getType());
965 return OriginalBaseAddress.withPointer(Addr);
968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
969 const VarDecl *OrigVD = nullptr;
970 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
971 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
972 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
973 Base = TempOASE->getBase()->IgnoreParenImpCasts();
974 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
975 Base = TempASE->getBase()->IgnoreParenImpCasts();
976 DE = cast<DeclRefExpr>(Base);
977 OrigVD = cast<VarDecl>(DE->getDecl());
978 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
979 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
980 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981 Base = TempASE->getBase()->IgnoreParenImpCasts();
982 DE = cast<DeclRefExpr>(Base);
983 OrigVD = cast<VarDecl>(DE->getDecl());
985 return OrigVD;
988 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
989 Address PrivateAddr) {
990 const DeclRefExpr *DE;
991 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
992 BaseDecls.emplace_back(OrigVD);
993 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
994 LValue BaseLValue =
995 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
996 OriginalBaseLValue);
997 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
998 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
999 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1000 SharedAddr.getPointer());
1001 llvm::Value *PrivatePointer =
1002 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1003 PrivateAddr.getPointer(), SharedAddr.getType());
1004 llvm::Value *Ptr = CGF.Builder.CreateGEP(
1005 SharedAddr.getElementType(), PrivatePointer, Adjustment);
1006 return castToBase(CGF, OrigVD->getType(),
1007 SharedAddresses[N].first.getType(),
1008 OriginalBaseLValue.getAddress(CGF), Ptr);
1010 BaseDecls.emplace_back(
1011 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1012 return PrivateAddr;
1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1016 const OMPDeclareReductionDecl *DRD =
1017 getReductionInit(ClausesData[N].ReductionOp);
1018 return DRD && DRD->getInitializer();
1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1022 return CGF.EmitLoadOfPointerLValue(
1023 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1024 getThreadIDVariable()->getType()->castAs<PointerType>());
1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1028 if (!CGF.HaveInsertPoint())
1029 return;
1030 // 1.2.2 OpenMP Language Terminology
1031 // Structured block - An executable statement with a single entry at the
1032 // top and a single exit at the bottom.
1033 // The point of exit cannot be a branch out of the structured block.
1034 // longjmp() and throw() must not violate the entry/exit criteria.
1035 CGF.EHStack.pushTerminate();
1036 if (S)
1037 CGF.incrementProfileCounter(S);
1038 CodeGen(CGF);
1039 CGF.EHStack.popTerminate();
1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043 CodeGenFunction &CGF) {
1044 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045 getThreadIDVariable()->getType(),
1046 AlignmentSource::Decl);
1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1050 QualType FieldTy) {
1051 auto *Field = FieldDecl::Create(
1052 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055 Field->setAccess(AS_public);
1056 DC->addDecl(Field);
1057 return Field;
1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061 StringRef Separator)
1062 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager() {
1064 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1066 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1067 OMPBuilder.initialize();
1068 loadOffloadInfoMetadata();
1071 void CGOpenMPRuntime::clear() {
1072 InternalVars.clear();
1073 // Clean non-target variable declarations possibly used only in debug info.
1074 for (const auto &Data : EmittedNonTargetVariables) {
1075 if (!Data.getValue().pointsToAliveValue())
1076 continue;
1077 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1078 if (!GV)
1079 continue;
1080 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1081 continue;
1082 GV->eraseFromParent();
1086 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1087 SmallString<128> Buffer;
1088 llvm::raw_svector_ostream OS(Buffer);
1089 StringRef Sep = FirstSeparator;
1090 for (StringRef Part : Parts) {
1091 OS << Sep << Part;
1092 Sep = Separator;
1094 return std::string(OS.str());
1097 static llvm::Function *
1098 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1099 const Expr *CombinerInitializer, const VarDecl *In,
1100 const VarDecl *Out, bool IsCombiner) {
1101 // void .omp_combiner.(Ty *in, Ty *out);
1102 ASTContext &C = CGM.getContext();
1103 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1104 FunctionArgList Args;
1105 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1106 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1107 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109 Args.push_back(&OmpOutParm);
1110 Args.push_back(&OmpInParm);
1111 const CGFunctionInfo &FnInfo =
1112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1113 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1114 std::string Name = CGM.getOpenMPRuntime().getName(
1115 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1116 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1117 Name, &CGM.getModule());
1118 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1119 if (CGM.getLangOpts().Optimize) {
1120 Fn->removeFnAttr(llvm::Attribute::NoInline);
1121 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1122 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1124 CodeGenFunction CGF(CGM);
1125 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1126 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1127 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1128 Out->getLocation());
1129 CodeGenFunction::OMPPrivateScope Scope(CGF);
1130 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1131 Scope.addPrivate(
1132 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1133 .getAddress(CGF));
1134 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1135 Scope.addPrivate(
1136 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1137 .getAddress(CGF));
1138 (void)Scope.Privatize();
1139 if (!IsCombiner && Out->hasInit() &&
1140 !CGF.isTrivialInitializer(Out->getInit())) {
1141 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1142 Out->getType().getQualifiers(),
1143 /*IsInitializer=*/true);
1145 if (CombinerInitializer)
1146 CGF.EmitIgnoredExpr(CombinerInitializer);
1147 Scope.ForceCleanup();
1148 CGF.FinishFunction();
1149 return Fn;
1152 void CGOpenMPRuntime::emitUserDefinedReduction(
1153 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1154 if (UDRMap.count(D) > 0)
1155 return;
1156 llvm::Function *Combiner = emitCombinerOrInitializer(
1157 CGM, D->getType(), D->getCombiner(),
1158 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1159 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1160 /*IsCombiner=*/true);
1161 llvm::Function *Initializer = nullptr;
1162 if (const Expr *Init = D->getInitializer()) {
1163 Initializer = emitCombinerOrInitializer(
1164 CGM, D->getType(),
1165 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1166 : nullptr,
1167 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1168 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1169 /*IsCombiner=*/false);
1171 UDRMap.try_emplace(D, Combiner, Initializer);
1172 if (CGF) {
1173 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1174 Decls.second.push_back(D);
1178 std::pair<llvm::Function *, llvm::Function *>
1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1180 auto I = UDRMap.find(D);
1181 if (I != UDRMap.end())
1182 return I->second;
1183 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1184 return UDRMap.lookup(D);
1187 namespace {
1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1189 // Builder if one is present.
1190 struct PushAndPopStackRAII {
1191 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1192 bool HasCancel, llvm::omp::Directive Kind)
1193 : OMPBuilder(OMPBuilder) {
1194 if (!OMPBuilder)
1195 return;
1197 // The following callback is the crucial part of clangs cleanup process.
1199 // NOTE:
1200 // Once the OpenMPIRBuilder is used to create parallel regions (and
1201 // similar), the cancellation destination (Dest below) is determined via
1202 // IP. That means if we have variables to finalize we split the block at IP,
1203 // use the new block (=BB) as destination to build a JumpDest (via
1204 // getJumpDestInCurrentScope(BB)) which then is fed to
1205 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1206 // to push & pop an FinalizationInfo object.
1207 // The FiniCB will still be needed but at the point where the
1208 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1209 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1210 assert(IP.getBlock()->end() == IP.getPoint() &&
1211 "Clang CG should cause non-terminated block!");
1212 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1213 CGF.Builder.restoreIP(IP);
1214 CodeGenFunction::JumpDest Dest =
1215 CGF.getOMPCancelDestination(OMPD_parallel);
1216 CGF.EmitBranchThroughCleanup(Dest);
1219 // TODO: Remove this once we emit parallel regions through the
1220 // OpenMPIRBuilder as it can do this setup internally.
1221 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1222 OMPBuilder->pushFinalizationCB(std::move(FI));
1224 ~PushAndPopStackRAII() {
1225 if (OMPBuilder)
1226 OMPBuilder->popFinalizationCB();
1228 llvm::OpenMPIRBuilder *OMPBuilder;
1230 } // namespace
1232 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1233 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1234 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1235 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1236 assert(ThreadIDVar->getType()->isPointerType() &&
1237 "thread id variable must be of type kmp_int32 *");
1238 CodeGenFunction CGF(CGM, true);
1239 bool HasCancel = false;
1240 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1241 HasCancel = OPD->hasCancel();
1242 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1243 HasCancel = OPD->hasCancel();
1244 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1245 HasCancel = OPSD->hasCancel();
1246 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1247 HasCancel = OPFD->hasCancel();
1248 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1249 HasCancel = OPFD->hasCancel();
1250 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1251 HasCancel = OPFD->hasCancel();
1252 else if (const auto *OPFD =
1253 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1254 HasCancel = OPFD->hasCancel();
1255 else if (const auto *OPFD =
1256 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1257 HasCancel = OPFD->hasCancel();
1259 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1260 // parallel region to make cancellation barriers work properly.
1261 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1262 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1263 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1264 HasCancel, OutlinedHelperName);
1265 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1266 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1269 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1270 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1271 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1272 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1273 return emitParallelOrTeamsOutlinedFunction(
1274 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1277 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1278 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1280 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1281 return emitParallelOrTeamsOutlinedFunction(
1282 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1285 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1286 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1287 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1289 bool Tied, unsigned &NumberOfParts) {
1290 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1291 PrePostActionTy &) {
1292 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1293 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1294 llvm::Value *TaskArgs[] = {
1295 UpLoc, ThreadID,
1296 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1297 TaskTVar->getType()->castAs<PointerType>())
1298 .getPointer(CGF)};
1299 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1300 CGM.getModule(), OMPRTL___kmpc_omp_task),
1301 TaskArgs);
1303 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1304 UntiedCodeGen);
1305 CodeGen.setAction(Action);
1306 assert(!ThreadIDVar->getType()->isPointerType() &&
1307 "thread id variable must be of type kmp_int32 for tasks");
1308 const OpenMPDirectiveKind Region =
1309 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1310 : OMPD_task;
1311 const CapturedStmt *CS = D.getCapturedStmt(Region);
1312 bool HasCancel = false;
1313 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1316 HasCancel = TD->hasCancel();
1317 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1318 HasCancel = TD->hasCancel();
1319 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1320 HasCancel = TD->hasCancel();
1322 CodeGenFunction CGF(CGM, true);
1323 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1324 InnermostKind, HasCancel, Action);
1325 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1326 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1327 if (!Tied)
1328 NumberOfParts = Action.getNumberOfParts();
1329 return Res;
1332 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1333 bool AtCurrentPoint) {
1334 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1335 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1337 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1338 if (AtCurrentPoint) {
1339 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1340 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1341 } else {
1342 Elem.second.ServiceInsertPt =
1343 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1344 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1348 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1349 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1350 if (Elem.second.ServiceInsertPt) {
1351 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1352 Elem.second.ServiceInsertPt = nullptr;
1353 Ptr->eraseFromParent();
1357 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1358 SourceLocation Loc,
1359 SmallString<128> &Buffer) {
1360 llvm::raw_svector_ostream OS(Buffer);
1361 // Build debug location
1362 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1363 OS << ";" << PLoc.getFilename() << ";";
1364 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1365 OS << FD->getQualifiedNameAsString();
1366 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1367 return OS.str();
1370 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1371 SourceLocation Loc,
1372 unsigned Flags) {
1373 uint32_t SrcLocStrSize;
1374 llvm::Constant *SrcLocStr;
1375 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1376 Loc.isInvalid()) {
1377 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1378 } else {
1379 std::string FunctionName;
1380 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1381 FunctionName = FD->getQualifiedNameAsString();
1382 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1383 const char *FileName = PLoc.getFilename();
1384 unsigned Line = PLoc.getLine();
1385 unsigned Column = PLoc.getColumn();
1386 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1387 Column, SrcLocStrSize);
1389 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1390 return OMPBuilder.getOrCreateIdent(
1391 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1394 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1395 SourceLocation Loc) {
1396 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1397 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1398 // the clang invariants used below might be broken.
1399 if (CGM.getLangOpts().OpenMPIRBuilder) {
1400 SmallString<128> Buffer;
1401 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1402 uint32_t SrcLocStrSize;
1403 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1404 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1405 return OMPBuilder.getOrCreateThreadID(
1406 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1409 llvm::Value *ThreadID = nullptr;
1410 // Check whether we've already cached a load of the thread id in this
1411 // function.
1412 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1413 if (I != OpenMPLocThreadIDMap.end()) {
1414 ThreadID = I->second.ThreadID;
1415 if (ThreadID != nullptr)
1416 return ThreadID;
1418 // If exceptions are enabled, do not use parameter to avoid possible crash.
1419 if (auto *OMPRegionInfo =
1420 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1421 if (OMPRegionInfo->getThreadIDVariable()) {
1422 // Check if this an outlined function with thread id passed as argument.
1423 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1424 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1425 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1426 !CGF.getLangOpts().CXXExceptions ||
1427 CGF.Builder.GetInsertBlock() == TopBlock ||
1428 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1429 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1430 TopBlock ||
1431 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1432 CGF.Builder.GetInsertBlock()) {
1433 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1434 // If value loaded in entry block, cache it and use it everywhere in
1435 // function.
1436 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1437 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1438 Elem.second.ThreadID = ThreadID;
1440 return ThreadID;
1445 // This is not an outlined function region - need to call __kmpc_int32
1446 // kmpc_global_thread_num(ident_t *loc).
1447 // Generate thread id value and cache this value for use across the
1448 // function.
1449 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1450 if (!Elem.second.ServiceInsertPt)
1451 setLocThreadIdInsertPt(CGF);
1452 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1453 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1454 llvm::CallInst *Call = CGF.Builder.CreateCall(
1455 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1456 OMPRTL___kmpc_global_thread_num),
1457 emitUpdateLocation(CGF, Loc));
1458 Call->setCallingConv(CGF.getRuntimeCC());
1459 Elem.second.ThreadID = Call;
1460 return Call;
1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1464 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1466 clearLocThreadIdInsertPt(CGF);
1467 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1469 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1470 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1471 UDRMap.erase(D);
1472 FunctionUDRMap.erase(CGF.CurFn);
1474 auto I = FunctionUDMMap.find(CGF.CurFn);
1475 if (I != FunctionUDMMap.end()) {
1476 for(const auto *D : I->second)
1477 UDMMap.erase(D);
1478 FunctionUDMMap.erase(I);
1480 LastprivateConditionalToTypes.erase(CGF.CurFn);
1481 FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485 return OMPBuilder.IdentPtr;
1488 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1489 if (!Kmpc_MicroTy) {
1490 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1492 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1493 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1495 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1498 llvm::FunctionCallee
1499 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1500 bool IsGPUDistribute) {
1501 assert((IVSize == 32 || IVSize == 64) &&
1502 "IV size is not compatible with the omp runtime");
1503 StringRef Name;
1504 if (IsGPUDistribute)
1505 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1506 : "__kmpc_distribute_static_init_4u")
1507 : (IVSigned ? "__kmpc_distribute_static_init_8"
1508 : "__kmpc_distribute_static_init_8u");
1509 else
1510 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1511 : "__kmpc_for_static_init_4u")
1512 : (IVSigned ? "__kmpc_for_static_init_8"
1513 : "__kmpc_for_static_init_8u");
1515 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1516 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1517 llvm::Type *TypeParams[] = {
1518 getIdentTyPointerTy(), // loc
1519 CGM.Int32Ty, // tid
1520 CGM.Int32Ty, // schedtype
1521 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1522 PtrTy, // p_lower
1523 PtrTy, // p_upper
1524 PtrTy, // p_stride
1525 ITy, // incr
1526 ITy // chunk
1528 auto *FnTy =
1529 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1530 return CGM.CreateRuntimeFunction(FnTy, Name);
1533 llvm::FunctionCallee
1534 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1535 assert((IVSize == 32 || IVSize == 64) &&
1536 "IV size is not compatible with the omp runtime");
1537 StringRef Name =
1538 IVSize == 32
1539 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1540 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1541 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1542 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1543 CGM.Int32Ty, // tid
1544 CGM.Int32Ty, // schedtype
1545 ITy, // lower
1546 ITy, // upper
1547 ITy, // stride
1548 ITy // chunk
1550 auto *FnTy =
1551 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1552 return CGM.CreateRuntimeFunction(FnTy, Name);
1555 llvm::FunctionCallee
1556 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1557 assert((IVSize == 32 || IVSize == 64) &&
1558 "IV size is not compatible with the omp runtime");
1559 StringRef Name =
1560 IVSize == 32
1561 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1562 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1563 llvm::Type *TypeParams[] = {
1564 getIdentTyPointerTy(), // loc
1565 CGM.Int32Ty, // tid
1567 auto *FnTy =
1568 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1569 return CGM.CreateRuntimeFunction(FnTy, Name);
1572 llvm::FunctionCallee
1573 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1574 assert((IVSize == 32 || IVSize == 64) &&
1575 "IV size is not compatible with the omp runtime");
1576 StringRef Name =
1577 IVSize == 32
1578 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1579 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1582 llvm::Type *TypeParams[] = {
1583 getIdentTyPointerTy(), // loc
1584 CGM.Int32Ty, // tid
1585 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1586 PtrTy, // p_lower
1587 PtrTy, // p_upper
1588 PtrTy // p_stride
1590 auto *FnTy =
1591 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1592 return CGM.CreateRuntimeFunction(FnTy, Name);
1595 /// Obtain information that uniquely identifies a target entry. This
1596 /// consists of the file and device IDs as well as line number associated with
1597 /// the relevant entry source location.
1598 static llvm::TargetRegionEntryInfo
1599 getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1600 StringRef ParentName = "") {
1601 SourceManager &SM = C.getSourceManager();
1603 // The loc should be always valid and have a file ID (the user cannot use
1604 // #pragma directives in macros)
1606 assert(Loc.isValid() && "Source location is expected to be always valid.");
1608 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1609 assert(PLoc.isValid() && "Source location is expected to be always valid.");
1611 llvm::sys::fs::UniqueID ID;
1612 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1613 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1614 assert(PLoc.isValid() && "Source location is expected to be always valid.");
1615 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1616 SM.getDiagnostics().Report(diag::err_cannot_open_file)
1617 << PLoc.getFilename() << EC.message();
1620 return llvm::TargetRegionEntryInfo(ParentName, ID.getDevice(), ID.getFile(),
1621 PLoc.getLine());
1624 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1625 if (CGM.getLangOpts().OpenMPSimd)
1626 return Address::invalid();
1627 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1628 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1629 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1630 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1631 HasRequiresUnifiedSharedMemory))) {
1632 SmallString<64> PtrName;
1634 llvm::raw_svector_ostream OS(PtrName);
1635 OS << CGM.getMangledName(GlobalDecl(VD));
1636 if (!VD->isExternallyVisible()) {
1637 auto EntryInfo = getTargetEntryUniqueInfo(
1638 CGM.getContext(), VD->getCanonicalDecl()->getBeginLoc());
1639 OS << llvm::format("_%x", EntryInfo.FileID);
1641 OS << "_decl_tgt_ref_ptr";
1643 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1644 QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1645 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
1646 if (!Ptr) {
1647 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
1649 auto *GV = cast<llvm::GlobalVariable>(Ptr);
1650 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1652 if (!CGM.getLangOpts().OpenMPIsDevice)
1653 GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1654 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1656 return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1658 return Address::invalid();
1661 llvm::Constant *
1662 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1663 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1664 !CGM.getContext().getTargetInfo().isTLSSupported());
1665 // Lookup the entry, lazily creating it if necessary.
1666 std::string Suffix = getName({"cache", ""});
1667 return getOrCreateInternalVariable(
1668 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1671 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1672 const VarDecl *VD,
1673 Address VDAddr,
1674 SourceLocation Loc) {
1675 if (CGM.getLangOpts().OpenMPUseTLS &&
1676 CGM.getContext().getTargetInfo().isTLSSupported())
1677 return VDAddr;
1679 llvm::Type *VarTy = VDAddr.getElementType();
1680 llvm::Value *Args[] = {
1681 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1682 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1683 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1684 getOrCreateThreadPrivateCache(VD)};
1685 return Address(
1686 CGF.EmitRuntimeCall(
1687 OMPBuilder.getOrCreateRuntimeFunction(
1688 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1689 Args),
1690 CGF.Int8Ty, VDAddr.getAlignment());
1693 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1694 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1695 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1696 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1697 // library.
1698 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1699 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1700 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1701 OMPLoc);
1702 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1703 // to register constructor/destructor for variable.
1704 llvm::Value *Args[] = {
1705 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1706 Ctor, CopyCtor, Dtor};
1707 CGF.EmitRuntimeCall(
1708 OMPBuilder.getOrCreateRuntimeFunction(
1709 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1710 Args);
1713 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1714 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1715 bool PerformInit, CodeGenFunction *CGF) {
1716 if (CGM.getLangOpts().OpenMPUseTLS &&
1717 CGM.getContext().getTargetInfo().isTLSSupported())
1718 return nullptr;
1720 VD = VD->getDefinition(CGM.getContext());
1721 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1722 QualType ASTTy = VD->getType();
1724 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1725 const Expr *Init = VD->getAnyInitializer();
1726 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1727 // Generate function that re-emits the declaration's initializer into the
1728 // threadprivate copy of the variable VD
1729 CodeGenFunction CtorCGF(CGM);
1730 FunctionArgList Args;
1731 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1732 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1733 ImplicitParamDecl::Other);
1734 Args.push_back(&Dst);
1736 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1737 CGM.getContext().VoidPtrTy, Args);
1738 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1739 std::string Name = getName({"__kmpc_global_ctor_", ""});
1740 llvm::Function *Fn =
1741 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1742 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1743 Args, Loc, Loc);
1744 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1745 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1746 CGM.getContext().VoidPtrTy, Dst.getLocation());
1747 Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1748 Arg = CtorCGF.Builder.CreateElementBitCast(
1749 Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1750 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1751 /*IsInitializer=*/true);
1752 ArgVal = CtorCGF.EmitLoadOfScalar(
1753 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1754 CGM.getContext().VoidPtrTy, Dst.getLocation());
1755 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1756 CtorCGF.FinishFunction();
1757 Ctor = Fn;
1759 if (VD->getType().isDestructedType() != QualType::DK_none) {
1760 // Generate function that emits destructor call for the threadprivate copy
1761 // of the variable VD
1762 CodeGenFunction DtorCGF(CGM);
1763 FunctionArgList Args;
1764 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1765 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1766 ImplicitParamDecl::Other);
1767 Args.push_back(&Dst);
1769 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1770 CGM.getContext().VoidTy, Args);
1771 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1772 std::string Name = getName({"__kmpc_global_dtor_", ""});
1773 llvm::Function *Fn =
1774 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1775 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1776 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1777 Loc, Loc);
1778 // Create a scope with an artificial location for the body of this function.
1779 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1780 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1781 DtorCGF.GetAddrOfLocalVar(&Dst),
1782 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1783 DtorCGF.emitDestroy(
1784 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1785 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1786 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1787 DtorCGF.FinishFunction();
1788 Dtor = Fn;
1790 // Do not emit init function if it is not required.
1791 if (!Ctor && !Dtor)
1792 return nullptr;
1794 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1795 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1796 /*isVarArg=*/false)
1797 ->getPointerTo();
1798 // Copying constructor for the threadprivate variable.
1799 // Must be NULL - reserved by runtime, but currently it requires that this
1800 // parameter is always NULL. Otherwise it fires assertion.
1801 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1802 if (Ctor == nullptr) {
1803 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1804 /*isVarArg=*/false)
1805 ->getPointerTo();
1806 Ctor = llvm::Constant::getNullValue(CtorTy);
1808 if (Dtor == nullptr) {
1809 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1810 /*isVarArg=*/false)
1811 ->getPointerTo();
1812 Dtor = llvm::Constant::getNullValue(DtorTy);
1814 if (!CGF) {
1815 auto *InitFunctionTy =
1816 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1817 std::string Name = getName({"__omp_threadprivate_init_", ""});
1818 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1819 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1820 CodeGenFunction InitCGF(CGM);
1821 FunctionArgList ArgList;
1822 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1823 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1824 Loc, Loc);
1825 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1826 InitCGF.FinishFunction();
1827 return InitFunction;
1829 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1831 return nullptr;
1834 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1835 llvm::GlobalVariable *Addr,
1836 bool PerformInit) {
1837 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1838 !CGM.getLangOpts().OpenMPIsDevice)
1839 return false;
1840 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1841 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1842 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1843 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1844 HasRequiresUnifiedSharedMemory))
1845 return CGM.getLangOpts().OpenMPIsDevice;
1846 VD = VD->getDefinition(CGM.getContext());
1847 assert(VD && "Unknown VarDecl");
1849 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1850 return CGM.getLangOpts().OpenMPIsDevice;
1852 QualType ASTTy = VD->getType();
1853 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1855 // Produce the unique prefix to identify the new target regions. We use
1856 // the source location of the variable declaration which we know to not
1857 // conflict with any target region.
1858 auto EntryInfo =
1859 getTargetEntryUniqueInfo(CGM.getContext(), Loc, VD->getName());
1860 SmallString<128> Buffer, Out;
1861 OffloadEntriesInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo);
1863 const Expr *Init = VD->getAnyInitializer();
1864 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1865 llvm::Constant *Ctor;
1866 llvm::Constant *ID;
1867 if (CGM.getLangOpts().OpenMPIsDevice) {
1868 // Generate function that re-emits the declaration's initializer into
1869 // the threadprivate copy of the variable VD
1870 CodeGenFunction CtorCGF(CGM);
1872 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1873 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1874 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1875 FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1876 llvm::GlobalValue::WeakODRLinkage);
1877 Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1878 if (CGM.getTriple().isAMDGCN())
1879 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1880 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1881 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1882 FunctionArgList(), Loc, Loc);
1883 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1884 llvm::Constant *AddrInAS0 = Addr;
1885 if (Addr->getAddressSpace() != 0)
1886 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1887 Addr, llvm::PointerType::getWithSamePointeeType(
1888 cast<llvm::PointerType>(Addr->getType()), 0));
1889 CtorCGF.EmitAnyExprToMem(Init,
1890 Address(AddrInAS0, Addr->getValueType(),
1891 CGM.getContext().getDeclAlign(VD)),
1892 Init->getType().getQualifiers(),
1893 /*IsInitializer=*/true);
1894 CtorCGF.FinishFunction();
1895 Ctor = Fn;
1896 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1897 } else {
1898 Ctor = new llvm::GlobalVariable(
1899 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1900 llvm::GlobalValue::PrivateLinkage,
1901 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1902 ID = Ctor;
1905 // Register the information for the entry associated with the constructor.
1906 Out.clear();
1907 auto CtorEntryInfo = EntryInfo;
1908 CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out);
1909 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1910 CtorEntryInfo, Ctor, ID,
1911 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor,
1912 CGM.getLangOpts().OpenMPIsDevice);
1914 if (VD->getType().isDestructedType() != QualType::DK_none) {
1915 llvm::Constant *Dtor;
1916 llvm::Constant *ID;
1917 if (CGM.getLangOpts().OpenMPIsDevice) {
1918 // Generate function that emits destructor call for the threadprivate
1919 // copy of the variable VD
1920 CodeGenFunction DtorCGF(CGM);
1922 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1923 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1924 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1925 FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1926 llvm::GlobalValue::WeakODRLinkage);
1927 Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1928 if (CGM.getTriple().isAMDGCN())
1929 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1930 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1931 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1932 FunctionArgList(), Loc, Loc);
1933 // Create a scope with an artificial location for the body of this
1934 // function.
1935 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1936 llvm::Constant *AddrInAS0 = Addr;
1937 if (Addr->getAddressSpace() != 0)
1938 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1939 Addr, llvm::PointerType::getWithSamePointeeType(
1940 cast<llvm::PointerType>(Addr->getType()), 0));
1941 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1942 CGM.getContext().getDeclAlign(VD)),
1943 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1944 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1945 DtorCGF.FinishFunction();
1946 Dtor = Fn;
1947 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1948 } else {
1949 Dtor = new llvm::GlobalVariable(
1950 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1951 llvm::GlobalValue::PrivateLinkage,
1952 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1953 ID = Dtor;
1955 // Register the information for the entry associated with the destructor.
1956 Out.clear();
1957 auto DtorEntryInfo = EntryInfo;
1958 DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out);
1959 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1960 DtorEntryInfo, Dtor, ID,
1961 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor,
1962 CGM.getLangOpts().OpenMPIsDevice);
1964 return CGM.getLangOpts().OpenMPIsDevice;
1967 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1968 QualType VarType,
1969 StringRef Name) {
1970 std::string Suffix = getName({"artificial", ""});
1971 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1972 llvm::GlobalVariable *GAddr =
1973 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1974 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1975 CGM.getTarget().isTLSSupported()) {
1976 GAddr->setThreadLocal(/*Val=*/true);
1977 return Address(GAddr, GAddr->getValueType(),
1978 CGM.getContext().getTypeAlignInChars(VarType));
1980 std::string CacheSuffix = getName({"cache", ""});
1981 llvm::Value *Args[] = {
1982 emitUpdateLocation(CGF, SourceLocation()),
1983 getThreadID(CGF, SourceLocation()),
1984 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1985 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1986 /*isSigned=*/false),
1987 getOrCreateInternalVariable(
1988 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
1989 return Address(
1990 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1991 CGF.EmitRuntimeCall(
1992 OMPBuilder.getOrCreateRuntimeFunction(
1993 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1994 Args),
1995 VarLVType->getPointerTo(/*AddrSpace=*/0)),
1996 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1999 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2000 const RegionCodeGenTy &ThenGen,
2001 const RegionCodeGenTy &ElseGen) {
2002 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2004 // If the condition constant folds and can be elided, try to avoid emitting
2005 // the condition and the dead arm of the if/else.
2006 bool CondConstant;
2007 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2008 if (CondConstant)
2009 ThenGen(CGF);
2010 else
2011 ElseGen(CGF);
2012 return;
2015 // Otherwise, the condition did not fold, or we couldn't elide it. Just
2016 // emit the conditional branch.
2017 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2018 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2019 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2020 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2022 // Emit the 'then' code.
2023 CGF.EmitBlock(ThenBlock);
2024 ThenGen(CGF);
2025 CGF.EmitBranch(ContBlock);
2026 // Emit the 'else' code if present.
2027 // There is no need to emit line number for unconditional branch.
2028 (void)ApplyDebugLocation::CreateEmpty(CGF);
2029 CGF.EmitBlock(ElseBlock);
2030 ElseGen(CGF);
2031 // There is no need to emit line number for unconditional branch.
2032 (void)ApplyDebugLocation::CreateEmpty(CGF);
2033 CGF.EmitBranch(ContBlock);
2034 // Emit the continuation block for code after the if.
2035 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2038 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2039 llvm::Function *OutlinedFn,
2040 ArrayRef<llvm::Value *> CapturedVars,
2041 const Expr *IfCond,
2042 llvm::Value *NumThreads) {
2043 if (!CGF.HaveInsertPoint())
2044 return;
2045 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2046 auto &M = CGM.getModule();
2047 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2048 this](CodeGenFunction &CGF, PrePostActionTy &) {
2049 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2050 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2051 llvm::Value *Args[] = {
2052 RTLoc,
2053 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2054 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2055 llvm::SmallVector<llvm::Value *, 16> RealArgs;
2056 RealArgs.append(std::begin(Args), std::end(Args));
2057 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2059 llvm::FunctionCallee RTLFn =
2060 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2061 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2063 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2064 this](CodeGenFunction &CGF, PrePostActionTy &) {
2065 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2066 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2067 // Build calls:
2068 // __kmpc_serialized_parallel(&Loc, GTid);
2069 llvm::Value *Args[] = {RTLoc, ThreadID};
2070 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2071 M, OMPRTL___kmpc_serialized_parallel),
2072 Args);
2074 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2075 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2076 Address ZeroAddrBound =
2077 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2078 /*Name=*/".bound.zero.addr");
2079 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2080 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2081 // ThreadId for serialized parallels is 0.
2082 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2083 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2084 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2086 // Ensure we do not inline the function. This is trivially true for the ones
2087 // passed to __kmpc_fork_call but the ones called in serialized regions
2088 // could be inlined. This is not a perfect but it is closer to the invariant
2089 // we want, namely, every data environment starts with a new function.
2090 // TODO: We should pass the if condition to the runtime function and do the
2091 // handling there. Much cleaner code.
2092 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2093 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2094 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2096 // __kmpc_end_serialized_parallel(&Loc, GTid);
2097 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2098 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2099 M, OMPRTL___kmpc_end_serialized_parallel),
2100 EndArgs);
2102 if (IfCond) {
2103 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2104 } else {
2105 RegionCodeGenTy ThenRCG(ThenGen);
2106 ThenRCG(CGF);
2110 // If we're inside an (outlined) parallel region, use the region info's
2111 // thread-ID variable (it is passed in a first argument of the outlined function
2112 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2113 // regular serial code region, get thread ID by calling kmp_int32
2114 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2115 // return the address of that temp.
2116 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2117 SourceLocation Loc) {
2118 if (auto *OMPRegionInfo =
2119 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2120 if (OMPRegionInfo->getThreadIDVariable())
2121 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2123 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2124 QualType Int32Ty =
2125 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2126 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2127 CGF.EmitStoreOfScalar(ThreadID,
2128 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2130 return ThreadIDTemp;
2133 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2134 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2135 SmallString<256> Buffer;
2136 llvm::raw_svector_ostream Out(Buffer);
2137 Out << Name;
2138 StringRef RuntimeName = Out.str();
2139 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2140 if (Elem.second) {
2141 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2142 "OMP internal variable has different type than requested");
2143 return &*Elem.second;
2146 return Elem.second = new llvm::GlobalVariable(
2147 CGM.getModule(), Ty, /*IsConstant*/ false,
2148 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2149 Elem.first(), /*InsertBefore=*/nullptr,
2150 llvm::GlobalValue::NotThreadLocal, AddressSpace);
2153 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2154 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2155 std::string Name = getName({Prefix, "var"});
2156 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2159 namespace {
2160 /// Common pre(post)-action for different OpenMP constructs.
2161 class CommonActionTy final : public PrePostActionTy {
2162 llvm::FunctionCallee EnterCallee;
2163 ArrayRef<llvm::Value *> EnterArgs;
2164 llvm::FunctionCallee ExitCallee;
2165 ArrayRef<llvm::Value *> ExitArgs;
2166 bool Conditional;
2167 llvm::BasicBlock *ContBlock = nullptr;
2169 public:
2170 CommonActionTy(llvm::FunctionCallee EnterCallee,
2171 ArrayRef<llvm::Value *> EnterArgs,
2172 llvm::FunctionCallee ExitCallee,
2173 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2174 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2175 ExitArgs(ExitArgs), Conditional(Conditional) {}
2176 void Enter(CodeGenFunction &CGF) override {
2177 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2178 if (Conditional) {
2179 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2180 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2181 ContBlock = CGF.createBasicBlock("omp_if.end");
2182 // Generate the branch (If-stmt)
2183 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2184 CGF.EmitBlock(ThenBlock);
2187 void Done(CodeGenFunction &CGF) {
2188 // Emit the rest of blocks/branches
2189 CGF.EmitBranch(ContBlock);
2190 CGF.EmitBlock(ContBlock, true);
2192 void Exit(CodeGenFunction &CGF) override {
2193 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2196 } // anonymous namespace
2198 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2199 StringRef CriticalName,
2200 const RegionCodeGenTy &CriticalOpGen,
2201 SourceLocation Loc, const Expr *Hint) {
2202 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2203 // CriticalOpGen();
2204 // __kmpc_end_critical(ident_t *, gtid, Lock);
2205 // Prepare arguments and build a call to __kmpc_critical
2206 if (!CGF.HaveInsertPoint())
2207 return;
2208 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2209 getCriticalRegionLock(CriticalName)};
2210 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2211 std::end(Args));
2212 if (Hint) {
2213 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2214 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2216 CommonActionTy Action(
2217 OMPBuilder.getOrCreateRuntimeFunction(
2218 CGM.getModule(),
2219 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2220 EnterArgs,
2221 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2222 OMPRTL___kmpc_end_critical),
2223 Args);
2224 CriticalOpGen.setAction(Action);
2225 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2228 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2229 const RegionCodeGenTy &MasterOpGen,
2230 SourceLocation Loc) {
2231 if (!CGF.HaveInsertPoint())
2232 return;
2233 // if(__kmpc_master(ident_t *, gtid)) {
2234 // MasterOpGen();
2235 // __kmpc_end_master(ident_t *, gtid);
2236 // }
2237 // Prepare arguments and build a call to __kmpc_master
2238 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2239 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2240 CGM.getModule(), OMPRTL___kmpc_master),
2241 Args,
2242 OMPBuilder.getOrCreateRuntimeFunction(
2243 CGM.getModule(), OMPRTL___kmpc_end_master),
2244 Args,
2245 /*Conditional=*/true);
2246 MasterOpGen.setAction(Action);
2247 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2248 Action.Done(CGF);
2251 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2252 const RegionCodeGenTy &MaskedOpGen,
2253 SourceLocation Loc, const Expr *Filter) {
2254 if (!CGF.HaveInsertPoint())
2255 return;
2256 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2257 // MaskedOpGen();
2258 // __kmpc_end_masked(iden_t *, gtid);
2259 // }
2260 // Prepare arguments and build a call to __kmpc_masked
2261 llvm::Value *FilterVal = Filter
2262 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2263 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2264 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2265 FilterVal};
2266 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2267 getThreadID(CGF, Loc)};
2268 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2269 CGM.getModule(), OMPRTL___kmpc_masked),
2270 Args,
2271 OMPBuilder.getOrCreateRuntimeFunction(
2272 CGM.getModule(), OMPRTL___kmpc_end_masked),
2273 ArgsEnd,
2274 /*Conditional=*/true);
2275 MaskedOpGen.setAction(Action);
2276 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2277 Action.Done(CGF);
2280 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2281 SourceLocation Loc) {
2282 if (!CGF.HaveInsertPoint())
2283 return;
2284 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2285 OMPBuilder.createTaskyield(CGF.Builder);
2286 } else {
2287 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2288 llvm::Value *Args[] = {
2289 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2290 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2291 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2292 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2293 Args);
2296 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2297 Region->emitUntiedSwitch(CGF);
2300 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2301 const RegionCodeGenTy &TaskgroupOpGen,
2302 SourceLocation Loc) {
2303 if (!CGF.HaveInsertPoint())
2304 return;
2305 // __kmpc_taskgroup(ident_t *, gtid);
2306 // TaskgroupOpGen();
2307 // __kmpc_end_taskgroup(ident_t *, gtid);
2308 // Prepare arguments and build a call to __kmpc_taskgroup
2309 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2310 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2311 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2312 Args,
2313 OMPBuilder.getOrCreateRuntimeFunction(
2314 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2315 Args);
2316 TaskgroupOpGen.setAction(Action);
2317 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2320 /// Given an array of pointers to variables, project the address of a
2321 /// given variable.
2322 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2323 unsigned Index, const VarDecl *Var) {
2324 // Pull out the pointer to the variable.
2325 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2326 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2328 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2329 return Address(
2330 CGF.Builder.CreateBitCast(
2331 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2332 ElemTy, CGF.getContext().getDeclAlign(Var));
2335 static llvm::Value *emitCopyprivateCopyFunction(
2336 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2337 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2338 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2339 SourceLocation Loc) {
2340 ASTContext &C = CGM.getContext();
2341 // void copy_func(void *LHSArg, void *RHSArg);
2342 FunctionArgList Args;
2343 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2344 ImplicitParamDecl::Other);
2345 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2346 ImplicitParamDecl::Other);
2347 Args.push_back(&LHSArg);
2348 Args.push_back(&RHSArg);
2349 const auto &CGFI =
2350 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2351 std::string Name =
2352 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2353 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2354 llvm::GlobalValue::InternalLinkage, Name,
2355 &CGM.getModule());
2356 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2357 Fn->setDoesNotRecurse();
2358 CodeGenFunction CGF(CGM);
2359 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2360 // Dest = (void*[n])(LHSArg);
2361 // Src = (void*[n])(RHSArg);
2362 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2363 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2364 ArgsElemType->getPointerTo()),
2365 ArgsElemType, CGF.getPointerAlign());
2366 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2367 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2368 ArgsElemType->getPointerTo()),
2369 ArgsElemType, CGF.getPointerAlign());
2370 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2371 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2372 // ...
2373 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2374 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2375 const auto *DestVar =
2376 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2377 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2379 const auto *SrcVar =
2380 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2381 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2383 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2384 QualType Type = VD->getType();
2385 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2387 CGF.FinishFunction();
2388 return Fn;
2391 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2392 const RegionCodeGenTy &SingleOpGen,
2393 SourceLocation Loc,
2394 ArrayRef<const Expr *> CopyprivateVars,
2395 ArrayRef<const Expr *> SrcExprs,
2396 ArrayRef<const Expr *> DstExprs,
2397 ArrayRef<const Expr *> AssignmentOps) {
2398 if (!CGF.HaveInsertPoint())
2399 return;
2400 assert(CopyprivateVars.size() == SrcExprs.size() &&
2401 CopyprivateVars.size() == DstExprs.size() &&
2402 CopyprivateVars.size() == AssignmentOps.size());
2403 ASTContext &C = CGM.getContext();
2404 // int32 did_it = 0;
2405 // if(__kmpc_single(ident_t *, gtid)) {
2406 // SingleOpGen();
2407 // __kmpc_end_single(ident_t *, gtid);
2408 // did_it = 1;
2409 // }
2410 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2411 // <copy_func>, did_it);
2413 Address DidIt = Address::invalid();
2414 if (!CopyprivateVars.empty()) {
2415 // int32 did_it = 0;
2416 QualType KmpInt32Ty =
2417 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2418 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2419 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2421 // Prepare arguments and build a call to __kmpc_single
2422 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2423 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2424 CGM.getModule(), OMPRTL___kmpc_single),
2425 Args,
2426 OMPBuilder.getOrCreateRuntimeFunction(
2427 CGM.getModule(), OMPRTL___kmpc_end_single),
2428 Args,
2429 /*Conditional=*/true);
2430 SingleOpGen.setAction(Action);
2431 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2432 if (DidIt.isValid()) {
2433 // did_it = 1;
2434 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2436 Action.Done(CGF);
2437 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2438 // <copy_func>, did_it);
2439 if (DidIt.isValid()) {
2440 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2441 QualType CopyprivateArrayTy = C.getConstantArrayType(
2442 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2443 /*IndexTypeQuals=*/0);
2444 // Create a list of all private variables for copyprivate.
2445 Address CopyprivateList =
2446 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2447 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2448 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2449 CGF.Builder.CreateStore(
2450 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2451 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2452 CGF.VoidPtrTy),
2453 Elem);
2455 // Build function that copies private values from single region to all other
2456 // threads in the corresponding parallel region.
2457 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2458 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2459 SrcExprs, DstExprs, AssignmentOps, Loc);
2460 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2461 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2462 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2463 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2464 llvm::Value *Args[] = {
2465 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2466 getThreadID(CGF, Loc), // i32 <gtid>
2467 BufSize, // size_t <buf_size>
2468 CL.getPointer(), // void *<copyprivate list>
2469 CpyFn, // void (*) (void *, void *) <copy_func>
2470 DidItVal // i32 did_it
2472 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2473 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2474 Args);
2478 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2479 const RegionCodeGenTy &OrderedOpGen,
2480 SourceLocation Loc, bool IsThreads) {
2481 if (!CGF.HaveInsertPoint())
2482 return;
2483 // __kmpc_ordered(ident_t *, gtid);
2484 // OrderedOpGen();
2485 // __kmpc_end_ordered(ident_t *, gtid);
2486 // Prepare arguments and build a call to __kmpc_ordered
2487 if (IsThreads) {
2488 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2489 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2490 CGM.getModule(), OMPRTL___kmpc_ordered),
2491 Args,
2492 OMPBuilder.getOrCreateRuntimeFunction(
2493 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2494 Args);
2495 OrderedOpGen.setAction(Action);
2496 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2497 return;
2499 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2502 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2503 unsigned Flags;
2504 if (Kind == OMPD_for)
2505 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2506 else if (Kind == OMPD_sections)
2507 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2508 else if (Kind == OMPD_single)
2509 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2510 else if (Kind == OMPD_barrier)
2511 Flags = OMP_IDENT_BARRIER_EXPL;
2512 else
2513 Flags = OMP_IDENT_BARRIER_IMPL;
2514 return Flags;
2517 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2518 CodeGenFunction &CGF, const OMPLoopDirective &S,
2519 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2520 // Check if the loop directive is actually a doacross loop directive. In this
2521 // case choose static, 1 schedule.
2522 if (llvm::any_of(
2523 S.getClausesOfKind<OMPOrderedClause>(),
2524 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2525 ScheduleKind = OMPC_SCHEDULE_static;
2526 // Chunk size is 1 in this case.
2527 llvm::APInt ChunkSize(32, 1);
2528 ChunkExpr = IntegerLiteral::Create(
2529 CGF.getContext(), ChunkSize,
2530 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2531 SourceLocation());
2535 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2536 OpenMPDirectiveKind Kind, bool EmitChecks,
2537 bool ForceSimpleCall) {
2538 // Check if we should use the OMPBuilder
2539 auto *OMPRegionInfo =
2540 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2541 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2542 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2543 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2544 return;
2547 if (!CGF.HaveInsertPoint())
2548 return;
2549 // Build call __kmpc_cancel_barrier(loc, thread_id);
2550 // Build call __kmpc_barrier(loc, thread_id);
2551 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2552 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2553 // thread_id);
2554 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2555 getThreadID(CGF, Loc)};
2556 if (OMPRegionInfo) {
2557 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2558 llvm::Value *Result = CGF.EmitRuntimeCall(
2559 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2560 OMPRTL___kmpc_cancel_barrier),
2561 Args);
2562 if (EmitChecks) {
2563 // if (__kmpc_cancel_barrier()) {
2564 // exit from construct;
2565 // }
2566 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2567 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2568 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2569 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2570 CGF.EmitBlock(ExitBB);
2571 // exit from construct;
2572 CodeGenFunction::JumpDest CancelDestination =
2573 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2574 CGF.EmitBranchThroughCleanup(CancelDestination);
2575 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2577 return;
2580 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2581 CGM.getModule(), OMPRTL___kmpc_barrier),
2582 Args);
2585 /// Map the OpenMP loop schedule to the runtime enumeration.
2586 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2587 bool Chunked, bool Ordered) {
2588 switch (ScheduleKind) {
2589 case OMPC_SCHEDULE_static:
2590 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2591 : (Ordered ? OMP_ord_static : OMP_sch_static);
2592 case OMPC_SCHEDULE_dynamic:
2593 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2594 case OMPC_SCHEDULE_guided:
2595 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2596 case OMPC_SCHEDULE_runtime:
2597 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2598 case OMPC_SCHEDULE_auto:
2599 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2600 case OMPC_SCHEDULE_unknown:
2601 assert(!Chunked && "chunk was specified but schedule kind not known");
2602 return Ordered ? OMP_ord_static : OMP_sch_static;
2604 llvm_unreachable("Unexpected runtime schedule");
2607 /// Map the OpenMP distribute schedule to the runtime enumeration.
2608 static OpenMPSchedType
2609 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2610 // only static is allowed for dist_schedule
2611 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2614 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2615 bool Chunked) const {
2616 OpenMPSchedType Schedule =
2617 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2618 return Schedule == OMP_sch_static;
2621 bool CGOpenMPRuntime::isStaticNonchunked(
2622 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2623 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2624 return Schedule == OMP_dist_sch_static;
2627 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2628 bool Chunked) const {
2629 OpenMPSchedType Schedule =
2630 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2631 return Schedule == OMP_sch_static_chunked;
2634 bool CGOpenMPRuntime::isStaticChunked(
2635 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2636 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2637 return Schedule == OMP_dist_sch_static_chunked;
2640 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2641 OpenMPSchedType Schedule =
2642 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2643 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2644 return Schedule != OMP_sch_static;
2647 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2648 OpenMPScheduleClauseModifier M1,
2649 OpenMPScheduleClauseModifier M2) {
2650 int Modifier = 0;
2651 switch (M1) {
2652 case OMPC_SCHEDULE_MODIFIER_monotonic:
2653 Modifier = OMP_sch_modifier_monotonic;
2654 break;
2655 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2656 Modifier = OMP_sch_modifier_nonmonotonic;
2657 break;
2658 case OMPC_SCHEDULE_MODIFIER_simd:
2659 if (Schedule == OMP_sch_static_chunked)
2660 Schedule = OMP_sch_static_balanced_chunked;
2661 break;
2662 case OMPC_SCHEDULE_MODIFIER_last:
2663 case OMPC_SCHEDULE_MODIFIER_unknown:
2664 break;
2666 switch (M2) {
2667 case OMPC_SCHEDULE_MODIFIER_monotonic:
2668 Modifier = OMP_sch_modifier_monotonic;
2669 break;
2670 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2671 Modifier = OMP_sch_modifier_nonmonotonic;
2672 break;
2673 case OMPC_SCHEDULE_MODIFIER_simd:
2674 if (Schedule == OMP_sch_static_chunked)
2675 Schedule = OMP_sch_static_balanced_chunked;
2676 break;
2677 case OMPC_SCHEDULE_MODIFIER_last:
2678 case OMPC_SCHEDULE_MODIFIER_unknown:
2679 break;
2681 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2682 // If the static schedule kind is specified or if the ordered clause is
2683 // specified, and if the nonmonotonic modifier is not specified, the effect is
2684 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2685 // modifier is specified, the effect is as if the nonmonotonic modifier is
2686 // specified.
2687 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2688 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2689 Schedule == OMP_sch_static_balanced_chunked ||
2690 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2691 Schedule == OMP_dist_sch_static_chunked ||
2692 Schedule == OMP_dist_sch_static))
2693 Modifier = OMP_sch_modifier_nonmonotonic;
2695 return Schedule | Modifier;
2698 void CGOpenMPRuntime::emitForDispatchInit(
2699 CodeGenFunction &CGF, SourceLocation Loc,
2700 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2701 bool Ordered, const DispatchRTInput &DispatchValues) {
2702 if (!CGF.HaveInsertPoint())
2703 return;
2704 OpenMPSchedType Schedule = getRuntimeSchedule(
2705 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2706 assert(Ordered ||
2707 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2708 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2709 Schedule != OMP_sch_static_balanced_chunked));
2710 // Call __kmpc_dispatch_init(
2711 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2712 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2713 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2715 // If the Chunk was not specified in the clause - use default value 1.
2716 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2717 : CGF.Builder.getIntN(IVSize, 1);
2718 llvm::Value *Args[] = {
2719 emitUpdateLocation(CGF, Loc),
2720 getThreadID(CGF, Loc),
2721 CGF.Builder.getInt32(addMonoNonMonoModifier(
2722 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2723 DispatchValues.LB, // Lower
2724 DispatchValues.UB, // Upper
2725 CGF.Builder.getIntN(IVSize, 1), // Stride
2726 Chunk // Chunk
2728 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2731 static void emitForStaticInitCall(
2732 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2733 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2734 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2735 const CGOpenMPRuntime::StaticRTInput &Values) {
2736 if (!CGF.HaveInsertPoint())
2737 return;
2739 assert(!Values.Ordered);
2740 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2741 Schedule == OMP_sch_static_balanced_chunked ||
2742 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2743 Schedule == OMP_dist_sch_static ||
2744 Schedule == OMP_dist_sch_static_chunked);
2746 // Call __kmpc_for_static_init(
2747 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2748 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2749 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2750 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2751 llvm::Value *Chunk = Values.Chunk;
2752 if (Chunk == nullptr) {
2753 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2754 Schedule == OMP_dist_sch_static) &&
2755 "expected static non-chunked schedule");
2756 // If the Chunk was not specified in the clause - use default value 1.
2757 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2758 } else {
2759 assert((Schedule == OMP_sch_static_chunked ||
2760 Schedule == OMP_sch_static_balanced_chunked ||
2761 Schedule == OMP_ord_static_chunked ||
2762 Schedule == OMP_dist_sch_static_chunked) &&
2763 "expected static chunked schedule");
2765 llvm::Value *Args[] = {
2766 UpdateLocation,
2767 ThreadId,
2768 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2769 M2)), // Schedule type
2770 Values.IL.getPointer(), // &isLastIter
2771 Values.LB.getPointer(), // &LB
2772 Values.UB.getPointer(), // &UB
2773 Values.ST.getPointer(), // &Stride
2774 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2775 Chunk // Chunk
2777 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2780 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2781 SourceLocation Loc,
2782 OpenMPDirectiveKind DKind,
2783 const OpenMPScheduleTy &ScheduleKind,
2784 const StaticRTInput &Values) {
2785 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2786 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2787 assert(isOpenMPWorksharingDirective(DKind) &&
2788 "Expected loop-based or sections-based directive.");
2789 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2790 isOpenMPLoopDirective(DKind)
2791 ? OMP_IDENT_WORK_LOOP
2792 : OMP_IDENT_WORK_SECTIONS);
2793 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2794 llvm::FunctionCallee StaticInitFunction =
2795 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2796 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2797 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2798 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2801 void CGOpenMPRuntime::emitDistributeStaticInit(
2802 CodeGenFunction &CGF, SourceLocation Loc,
2803 OpenMPDistScheduleClauseKind SchedKind,
2804 const CGOpenMPRuntime::StaticRTInput &Values) {
2805 OpenMPSchedType ScheduleNum =
2806 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2807 llvm::Value *UpdatedLocation =
2808 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2809 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2810 llvm::FunctionCallee StaticInitFunction;
2811 bool isGPUDistribute =
2812 CGM.getLangOpts().OpenMPIsDevice &&
2813 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2814 StaticInitFunction = createForStaticInitFunction(
2815 Values.IVSize, Values.IVSigned, isGPUDistribute);
2817 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2818 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2819 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2822 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2823 SourceLocation Loc,
2824 OpenMPDirectiveKind DKind) {
2825 if (!CGF.HaveInsertPoint())
2826 return;
2827 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2828 llvm::Value *Args[] = {
2829 emitUpdateLocation(CGF, Loc,
2830 isOpenMPDistributeDirective(DKind)
2831 ? OMP_IDENT_WORK_DISTRIBUTE
2832 : isOpenMPLoopDirective(DKind)
2833 ? OMP_IDENT_WORK_LOOP
2834 : OMP_IDENT_WORK_SECTIONS),
2835 getThreadID(CGF, Loc)};
2836 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2837 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2838 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2839 CGF.EmitRuntimeCall(
2840 OMPBuilder.getOrCreateRuntimeFunction(
2841 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2842 Args);
2843 else
2844 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2845 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2846 Args);
2849 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2850 SourceLocation Loc,
2851 unsigned IVSize,
2852 bool IVSigned) {
2853 if (!CGF.HaveInsertPoint())
2854 return;
2855 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2856 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2857 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2860 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2861 SourceLocation Loc, unsigned IVSize,
2862 bool IVSigned, Address IL,
2863 Address LB, Address UB,
2864 Address ST) {
2865 // Call __kmpc_dispatch_next(
2866 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2867 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2868 // kmp_int[32|64] *p_stride);
2869 llvm::Value *Args[] = {
2870 emitUpdateLocation(CGF, Loc),
2871 getThreadID(CGF, Loc),
2872 IL.getPointer(), // &isLastIter
2873 LB.getPointer(), // &Lower
2874 UB.getPointer(), // &Upper
2875 ST.getPointer() // &Stride
2877 llvm::Value *Call =
2878 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2879 return CGF.EmitScalarConversion(
2880 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2881 CGF.getContext().BoolTy, Loc);
2884 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2885 llvm::Value *NumThreads,
2886 SourceLocation Loc) {
2887 if (!CGF.HaveInsertPoint())
2888 return;
2889 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2890 llvm::Value *Args[] = {
2891 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2892 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2893 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2894 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2895 Args);
2898 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2899 ProcBindKind ProcBind,
2900 SourceLocation Loc) {
2901 if (!CGF.HaveInsertPoint())
2902 return;
2903 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2904 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2905 llvm::Value *Args[] = {
2906 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2907 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2908 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2909 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2910 Args);
2913 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2914 SourceLocation Loc, llvm::AtomicOrdering AO) {
2915 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2916 OMPBuilder.createFlush(CGF.Builder);
2917 } else {
2918 if (!CGF.HaveInsertPoint())
2919 return;
2920 // Build call void __kmpc_flush(ident_t *loc)
2921 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2922 CGM.getModule(), OMPRTL___kmpc_flush),
2923 emitUpdateLocation(CGF, Loc));
2927 namespace {
2928 /// Indexes of fields for type kmp_task_t.
2929 enum KmpTaskTFields {
2930 /// List of shared variables.
2931 KmpTaskTShareds,
2932 /// Task routine.
2933 KmpTaskTRoutine,
2934 /// Partition id for the untied tasks.
2935 KmpTaskTPartId,
2936 /// Function with call of destructors for private variables.
2937 Data1,
2938 /// Task priority.
2939 Data2,
2940 /// (Taskloops only) Lower bound.
2941 KmpTaskTLowerBound,
2942 /// (Taskloops only) Upper bound.
2943 KmpTaskTUpperBound,
2944 /// (Taskloops only) Stride.
2945 KmpTaskTStride,
2946 /// (Taskloops only) Is last iteration flag.
2947 KmpTaskTLastIter,
2948 /// (Taskloops only) Reduction data.
2949 KmpTaskTReductions,
2951 } // anonymous namespace
2953 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2954 // If we are in simd mode or there are no entries, we don't need to do
2955 // anything.
2956 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
2957 return;
2959 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2960 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2961 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2962 SourceLocation Loc;
2963 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2964 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2965 E = CGM.getContext().getSourceManager().fileinfo_end();
2966 I != E; ++I) {
2967 if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID &&
2968 I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) {
2969 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2970 I->getFirst(), EntryInfo.Line, 1);
2971 break;
2975 switch (Kind) {
2976 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2977 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2978 DiagnosticsEngine::Error, "Offloading entry for target region in "
2979 "%0 is incorrect: either the "
2980 "address or the ID is invalid.");
2981 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2982 } break;
2983 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2984 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2985 DiagnosticsEngine::Error, "Offloading entry for declare target "
2986 "variable %0 is incorrect: the "
2987 "address is invalid.");
2988 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2989 } break;
2990 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2991 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2992 DiagnosticsEngine::Error,
2993 "Offloading entry for declare target variable is incorrect: the "
2994 "address is invalid.");
2995 CGM.getDiags().Report(DiagID);
2996 } break;
3000 OMPBuilder.createOffloadEntriesAndInfoMetadata(
3001 OffloadEntriesInfoManager, isTargetCodegen(),
3002 CGM.getLangOpts().OpenMPIsDevice,
3003 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory(), ErrorReportFn);
3006 /// Loads all the offload entries information from the host IR
3007 /// metadata.
3008 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3009 // If we are in target mode, load the metadata from the host IR. This code has
3010 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3012 if (!CGM.getLangOpts().OpenMPIsDevice)
3013 return;
3015 if (CGM.getLangOpts().OMPHostIRFile.empty())
3016 return;
3018 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3019 if (auto EC = Buf.getError()) {
3020 CGM.getDiags().Report(diag::err_cannot_open_file)
3021 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3022 return;
3025 llvm::LLVMContext C;
3026 auto ME = expectedToErrorOrAndEmitErrors(
3027 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3029 if (auto EC = ME.getError()) {
3030 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3031 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3032 CGM.getDiags().Report(DiagID)
3033 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3034 return;
3037 OMPBuilder.loadOffloadInfoMetadata(*ME.get(), OffloadEntriesInfoManager);
3040 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3041 if (!KmpRoutineEntryPtrTy) {
3042 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3043 ASTContext &C = CGM.getContext();
3044 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3045 FunctionProtoType::ExtProtoInfo EPI;
3046 KmpRoutineEntryPtrQTy = C.getPointerType(
3047 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3048 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3052 namespace {
3053 struct PrivateHelpersTy {
3054 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3055 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3056 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3057 PrivateElemInit(PrivateElemInit) {}
3058 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3059 const Expr *OriginalRef = nullptr;
3060 const VarDecl *Original = nullptr;
3061 const VarDecl *PrivateCopy = nullptr;
3062 const VarDecl *PrivateElemInit = nullptr;
3063 bool isLocalPrivate() const {
3064 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3067 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3068 } // anonymous namespace
3070 static bool isAllocatableDecl(const VarDecl *VD) {
3071 const VarDecl *CVD = VD->getCanonicalDecl();
3072 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3073 return false;
3074 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3075 // Use the default allocation.
3076 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3077 !AA->getAllocator());
3080 static RecordDecl *
3081 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3082 if (!Privates.empty()) {
3083 ASTContext &C = CGM.getContext();
3084 // Build struct .kmp_privates_t. {
3085 // /* private vars */
3086 // };
3087 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3088 RD->startDefinition();
3089 for (const auto &Pair : Privates) {
3090 const VarDecl *VD = Pair.second.Original;
3091 QualType Type = VD->getType().getNonReferenceType();
3092 // If the private variable is a local variable with lvalue ref type,
3093 // allocate the pointer instead of the pointee type.
3094 if (Pair.second.isLocalPrivate()) {
3095 if (VD->getType()->isLValueReferenceType())
3096 Type = C.getPointerType(Type);
3097 if (isAllocatableDecl(VD))
3098 Type = C.getPointerType(Type);
3100 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3101 if (VD->hasAttrs()) {
3102 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3103 E(VD->getAttrs().end());
3104 I != E; ++I)
3105 FD->addAttr(*I);
3108 RD->completeDefinition();
3109 return RD;
3111 return nullptr;
3114 static RecordDecl *
3115 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3116 QualType KmpInt32Ty,
3117 QualType KmpRoutineEntryPointerQTy) {
3118 ASTContext &C = CGM.getContext();
3119 // Build struct kmp_task_t {
3120 // void * shareds;
3121 // kmp_routine_entry_t routine;
3122 // kmp_int32 part_id;
3123 // kmp_cmplrdata_t data1;
3124 // kmp_cmplrdata_t data2;
3125 // For taskloops additional fields:
3126 // kmp_uint64 lb;
3127 // kmp_uint64 ub;
3128 // kmp_int64 st;
3129 // kmp_int32 liter;
3130 // void * reductions;
3131 // };
3132 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3133 UD->startDefinition();
3134 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3135 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3136 UD->completeDefinition();
3137 QualType KmpCmplrdataTy = C.getRecordType(UD);
3138 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3139 RD->startDefinition();
3140 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3141 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3142 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3143 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3144 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3145 if (isOpenMPTaskLoopDirective(Kind)) {
3146 QualType KmpUInt64Ty =
3147 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3148 QualType KmpInt64Ty =
3149 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3150 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3151 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3152 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3153 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3154 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3156 RD->completeDefinition();
3157 return RD;
3160 static RecordDecl *
3161 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3162 ArrayRef<PrivateDataTy> Privates) {
3163 ASTContext &C = CGM.getContext();
3164 // Build struct kmp_task_t_with_privates {
3165 // kmp_task_t task_data;
3166 // .kmp_privates_t. privates;
3167 // };
3168 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3169 RD->startDefinition();
3170 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3171 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3172 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3173 RD->completeDefinition();
3174 return RD;
3177 /// Emit a proxy function which accepts kmp_task_t as the second
3178 /// argument.
3179 /// \code
3180 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3181 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3182 /// For taskloops:
3183 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3184 /// tt->reductions, tt->shareds);
3185 /// return 0;
3186 /// }
3187 /// \endcode
3188 static llvm::Function *
3189 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3190 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3191 QualType KmpTaskTWithPrivatesPtrQTy,
3192 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3193 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3194 llvm::Value *TaskPrivatesMap) {
3195 ASTContext &C = CGM.getContext();
3196 FunctionArgList Args;
3197 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3198 ImplicitParamDecl::Other);
3199 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3200 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3201 ImplicitParamDecl::Other);
3202 Args.push_back(&GtidArg);
3203 Args.push_back(&TaskTypeArg);
3204 const auto &TaskEntryFnInfo =
3205 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3206 llvm::FunctionType *TaskEntryTy =
3207 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3208 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3209 auto *TaskEntry = llvm::Function::Create(
3210 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3211 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3212 TaskEntry->setDoesNotRecurse();
3213 CodeGenFunction CGF(CGM);
3214 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3215 Loc, Loc);
3217 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3218 // tt,
3219 // For taskloops:
3220 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3221 // tt->task_data.shareds);
3222 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3223 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3224 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3225 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3226 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3227 const auto *KmpTaskTWithPrivatesQTyRD =
3228 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3229 LValue Base =
3230 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3231 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3232 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3233 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3234 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3236 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3237 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3238 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3239 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3240 CGF.ConvertTypeForMem(SharedsPtrTy));
3242 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3243 llvm::Value *PrivatesParam;
3244 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3245 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3246 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3247 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3248 } else {
3249 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3252 llvm::Value *CommonArgs[] = {
3253 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3254 CGF.Builder
3255 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3256 CGF.VoidPtrTy, CGF.Int8Ty)
3257 .getPointer()};
3258 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3259 std::end(CommonArgs));
3260 if (isOpenMPTaskLoopDirective(Kind)) {
3261 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3262 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3263 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3264 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3265 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3266 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3267 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3268 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3269 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3270 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3271 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3272 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3273 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3274 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3275 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3276 CallArgs.push_back(LBParam);
3277 CallArgs.push_back(UBParam);
3278 CallArgs.push_back(StParam);
3279 CallArgs.push_back(LIParam);
3280 CallArgs.push_back(RParam);
3282 CallArgs.push_back(SharedsParam);
3284 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3285 CallArgs);
3286 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3287 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3288 CGF.FinishFunction();
3289 return TaskEntry;
3292 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3293 SourceLocation Loc,
3294 QualType KmpInt32Ty,
3295 QualType KmpTaskTWithPrivatesPtrQTy,
3296 QualType KmpTaskTWithPrivatesQTy) {
3297 ASTContext &C = CGM.getContext();
3298 FunctionArgList Args;
3299 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3300 ImplicitParamDecl::Other);
3301 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3302 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3303 ImplicitParamDecl::Other);
3304 Args.push_back(&GtidArg);
3305 Args.push_back(&TaskTypeArg);
3306 const auto &DestructorFnInfo =
3307 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3308 llvm::FunctionType *DestructorFnTy =
3309 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3310 std::string Name =
3311 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3312 auto *DestructorFn =
3313 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3314 Name, &CGM.getModule());
3315 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3316 DestructorFnInfo);
3317 DestructorFn->setDoesNotRecurse();
3318 CodeGenFunction CGF(CGM);
3319 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3320 Args, Loc, Loc);
3322 LValue Base = CGF.EmitLoadOfPointerLValue(
3323 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3324 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3325 const auto *KmpTaskTWithPrivatesQTyRD =
3326 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3327 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3328 Base = CGF.EmitLValueForField(Base, *FI);
3329 for (const auto *Field :
3330 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3331 if (QualType::DestructionKind DtorKind =
3332 Field->getType().isDestructedType()) {
3333 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3334 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3337 CGF.FinishFunction();
3338 return DestructorFn;
3341 /// Emit a privates mapping function for correct handling of private and
3342 /// firstprivate variables.
3343 /// \code
3344 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3345 /// **noalias priv1,..., <tyn> **noalias privn) {
3346 /// *priv1 = &.privates.priv1;
3347 /// ...;
3348 /// *privn = &.privates.privn;
3349 /// }
3350 /// \endcode
3351 static llvm::Value *
3352 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3353 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3354 ArrayRef<PrivateDataTy> Privates) {
3355 ASTContext &C = CGM.getContext();
3356 FunctionArgList Args;
3357 ImplicitParamDecl TaskPrivatesArg(
3358 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3359 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3360 ImplicitParamDecl::Other);
3361 Args.push_back(&TaskPrivatesArg);
3362 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3363 unsigned Counter = 1;
3364 for (const Expr *E : Data.PrivateVars) {
3365 Args.push_back(ImplicitParamDecl::Create(
3366 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3367 C.getPointerType(C.getPointerType(E->getType()))
3368 .withConst()
3369 .withRestrict(),
3370 ImplicitParamDecl::Other));
3371 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3372 PrivateVarsPos[VD] = Counter;
3373 ++Counter;
3375 for (const Expr *E : Data.FirstprivateVars) {
3376 Args.push_back(ImplicitParamDecl::Create(
3377 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3378 C.getPointerType(C.getPointerType(E->getType()))
3379 .withConst()
3380 .withRestrict(),
3381 ImplicitParamDecl::Other));
3382 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3383 PrivateVarsPos[VD] = Counter;
3384 ++Counter;
3386 for (const Expr *E : Data.LastprivateVars) {
3387 Args.push_back(ImplicitParamDecl::Create(
3388 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3389 C.getPointerType(C.getPointerType(E->getType()))
3390 .withConst()
3391 .withRestrict(),
3392 ImplicitParamDecl::Other));
3393 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3394 PrivateVarsPos[VD] = Counter;
3395 ++Counter;
3397 for (const VarDecl *VD : Data.PrivateLocals) {
3398 QualType Ty = VD->getType().getNonReferenceType();
3399 if (VD->getType()->isLValueReferenceType())
3400 Ty = C.getPointerType(Ty);
3401 if (isAllocatableDecl(VD))
3402 Ty = C.getPointerType(Ty);
3403 Args.push_back(ImplicitParamDecl::Create(
3404 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3405 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3406 ImplicitParamDecl::Other));
3407 PrivateVarsPos[VD] = Counter;
3408 ++Counter;
3410 const auto &TaskPrivatesMapFnInfo =
3411 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3412 llvm::FunctionType *TaskPrivatesMapTy =
3413 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3414 std::string Name =
3415 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3416 auto *TaskPrivatesMap = llvm::Function::Create(
3417 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3418 &CGM.getModule());
3419 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3420 TaskPrivatesMapFnInfo);
3421 if (CGM.getLangOpts().Optimize) {
3422 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3423 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3424 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3426 CodeGenFunction CGF(CGM);
3427 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3428 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3430 // *privi = &.privates.privi;
3431 LValue Base = CGF.EmitLoadOfPointerLValue(
3432 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3433 TaskPrivatesArg.getType()->castAs<PointerType>());
3434 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3435 Counter = 0;
3436 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3437 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3438 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3439 LValue RefLVal =
3440 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3441 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3442 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3443 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3444 ++Counter;
3446 CGF.FinishFunction();
3447 return TaskPrivatesMap;
3450 /// Emit initialization for private variables in task-based directives.
3451 static void emitPrivatesInit(CodeGenFunction &CGF,
3452 const OMPExecutableDirective &D,
3453 Address KmpTaskSharedsPtr, LValue TDBase,
3454 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3455 QualType SharedsTy, QualType SharedsPtrTy,
3456 const OMPTaskDataTy &Data,
3457 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3458 ASTContext &C = CGF.getContext();
3459 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3460 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3461 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3462 ? OMPD_taskloop
3463 : OMPD_task;
3464 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3465 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3466 LValue SrcBase;
3467 bool IsTargetTask =
3468 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3469 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3470 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3471 // PointersArray, SizesArray, and MappersArray. The original variables for
3472 // these arrays are not captured and we get their addresses explicitly.
3473 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3474 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3475 SrcBase = CGF.MakeAddrLValue(
3476 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3477 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3478 CGF.ConvertTypeForMem(SharedsTy)),
3479 SharedsTy);
3481 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3482 for (const PrivateDataTy &Pair : Privates) {
3483 // Do not initialize private locals.
3484 if (Pair.second.isLocalPrivate()) {
3485 ++FI;
3486 continue;
3488 const VarDecl *VD = Pair.second.PrivateCopy;
3489 const Expr *Init = VD->getAnyInitializer();
3490 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3491 !CGF.isTrivialInitializer(Init)))) {
3492 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3493 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3494 const VarDecl *OriginalVD = Pair.second.Original;
3495 // Check if the variable is the target-based BasePointersArray,
3496 // PointersArray, SizesArray, or MappersArray.
3497 LValue SharedRefLValue;
3498 QualType Type = PrivateLValue.getType();
3499 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3500 if (IsTargetTask && !SharedField) {
3501 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3502 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3503 cast<CapturedDecl>(OriginalVD->getDeclContext())
3504 ->getNumParams() == 0 &&
3505 isa<TranslationUnitDecl>(
3506 cast<CapturedDecl>(OriginalVD->getDeclContext())
3507 ->getDeclContext()) &&
3508 "Expected artificial target data variable.");
3509 SharedRefLValue =
3510 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3511 } else if (ForDup) {
3512 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3513 SharedRefLValue = CGF.MakeAddrLValue(
3514 SharedRefLValue.getAddress(CGF).withAlignment(
3515 C.getDeclAlign(OriginalVD)),
3516 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3517 SharedRefLValue.getTBAAInfo());
3518 } else if (CGF.LambdaCaptureFields.count(
3519 Pair.second.Original->getCanonicalDecl()) > 0 ||
3520 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3521 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3522 } else {
3523 // Processing for implicitly captured variables.
3524 InlinedOpenMPRegionRAII Region(
3525 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3526 /*HasCancel=*/false, /*NoInheritance=*/true);
3527 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3529 if (Type->isArrayType()) {
3530 // Initialize firstprivate array.
3531 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3532 // Perform simple memcpy.
3533 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3534 } else {
3535 // Initialize firstprivate array using element-by-element
3536 // initialization.
3537 CGF.EmitOMPAggregateAssign(
3538 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3539 Type,
3540 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3541 Address SrcElement) {
3542 // Clean up any temporaries needed by the initialization.
3543 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3544 InitScope.addPrivate(Elem, SrcElement);
3545 (void)InitScope.Privatize();
3546 // Emit initialization for single element.
3547 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3548 CGF, &CapturesInfo);
3549 CGF.EmitAnyExprToMem(Init, DestElement,
3550 Init->getType().getQualifiers(),
3551 /*IsInitializer=*/false);
3554 } else {
3555 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3556 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3557 (void)InitScope.Privatize();
3558 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3559 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3560 /*capturedByInit=*/false);
3562 } else {
3563 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3566 ++FI;
3570 /// Check if duplication function is required for taskloops.
3571 static bool checkInitIsRequired(CodeGenFunction &CGF,
3572 ArrayRef<PrivateDataTy> Privates) {
3573 bool InitRequired = false;
3574 for (const PrivateDataTy &Pair : Privates) {
3575 if (Pair.second.isLocalPrivate())
3576 continue;
3577 const VarDecl *VD = Pair.second.PrivateCopy;
3578 const Expr *Init = VD->getAnyInitializer();
3579 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3580 !CGF.isTrivialInitializer(Init));
3581 if (InitRequired)
3582 break;
3584 return InitRequired;
3588 /// Emit task_dup function (for initialization of
3589 /// private/firstprivate/lastprivate vars and last_iter flag)
3590 /// \code
3591 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3592 /// lastpriv) {
3593 /// // setup lastprivate flag
3594 /// task_dst->last = lastpriv;
3595 /// // could be constructor calls here...
3596 /// }
3597 /// \endcode
3598 static llvm::Value *
3599 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3600 const OMPExecutableDirective &D,
3601 QualType KmpTaskTWithPrivatesPtrQTy,
3602 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3603 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3604 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3605 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3606 ASTContext &C = CGM.getContext();
3607 FunctionArgList Args;
3608 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3609 KmpTaskTWithPrivatesPtrQTy,
3610 ImplicitParamDecl::Other);
3611 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3612 KmpTaskTWithPrivatesPtrQTy,
3613 ImplicitParamDecl::Other);
3614 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3615 ImplicitParamDecl::Other);
3616 Args.push_back(&DstArg);
3617 Args.push_back(&SrcArg);
3618 Args.push_back(&LastprivArg);
3619 const auto &TaskDupFnInfo =
3620 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3621 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3622 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3623 auto *TaskDup = llvm::Function::Create(
3624 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3625 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3626 TaskDup->setDoesNotRecurse();
3627 CodeGenFunction CGF(CGM);
3628 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3629 Loc);
3631 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3632 CGF.GetAddrOfLocalVar(&DstArg),
3633 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3634 // task_dst->liter = lastpriv;
3635 if (WithLastIter) {
3636 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3637 LValue Base = CGF.EmitLValueForField(
3638 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3639 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3640 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3641 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3642 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3645 // Emit initial values for private copies (if any).
3646 assert(!Privates.empty());
3647 Address KmpTaskSharedsPtr = Address::invalid();
3648 if (!Data.FirstprivateVars.empty()) {
3649 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3650 CGF.GetAddrOfLocalVar(&SrcArg),
3651 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3652 LValue Base = CGF.EmitLValueForField(
3653 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3654 KmpTaskSharedsPtr = Address(
3655 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3656 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3657 KmpTaskTShareds)),
3658 Loc),
3659 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3661 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3662 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3663 CGF.FinishFunction();
3664 return TaskDup;
3667 /// Checks if destructor function is required to be generated.
3668 /// \return true if cleanups are required, false otherwise.
3669 static bool
3670 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3671 ArrayRef<PrivateDataTy> Privates) {
3672 for (const PrivateDataTy &P : Privates) {
3673 if (P.second.isLocalPrivate())
3674 continue;
3675 QualType Ty = P.second.Original->getType().getNonReferenceType();
3676 if (Ty.isDestructedType())
3677 return true;
3679 return false;
3682 namespace {
3683 /// Loop generator for OpenMP iterator expression.
3684 class OMPIteratorGeneratorScope final
3685 : public CodeGenFunction::OMPPrivateScope {
3686 CodeGenFunction &CGF;
3687 const OMPIteratorExpr *E = nullptr;
3688 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3689 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3690 OMPIteratorGeneratorScope() = delete;
3691 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3693 public:
3694 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3695 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3696 if (!E)
3697 return;
3698 SmallVector<llvm::Value *, 4> Uppers;
3699 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3700 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3701 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3702 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3703 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3704 addPrivate(
3705 HelperData.CounterVD,
3706 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3708 Privatize();
3710 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3711 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3712 LValue CLVal =
3713 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3714 HelperData.CounterVD->getType());
3715 // Counter = 0;
3716 CGF.EmitStoreOfScalar(
3717 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3718 CLVal);
3719 CodeGenFunction::JumpDest &ContDest =
3720 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3721 CodeGenFunction::JumpDest &ExitDest =
3722 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3723 // N = <number-of_iterations>;
3724 llvm::Value *N = Uppers[I];
3725 // cont:
3726 // if (Counter < N) goto body; else goto exit;
3727 CGF.EmitBlock(ContDest.getBlock());
3728 auto *CVal =
3729 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3730 llvm::Value *Cmp =
3731 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3732 ? CGF.Builder.CreateICmpSLT(CVal, N)
3733 : CGF.Builder.CreateICmpULT(CVal, N);
3734 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3735 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3736 // body:
3737 CGF.EmitBlock(BodyBB);
3738 // Iteri = Begini + Counter * Stepi;
3739 CGF.EmitIgnoredExpr(HelperData.Update);
3742 ~OMPIteratorGeneratorScope() {
3743 if (!E)
3744 return;
3745 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3746 // Counter = Counter + 1;
3747 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3748 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3749 // goto cont;
3750 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3751 // exit:
3752 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3756 } // namespace
3758 static std::pair<llvm::Value *, llvm::Value *>
3759 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3760 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3761 llvm::Value *Addr;
3762 if (OASE) {
3763 const Expr *Base = OASE->getBase();
3764 Addr = CGF.EmitScalarExpr(Base);
3765 } else {
3766 Addr = CGF.EmitLValue(E).getPointer(CGF);
3768 llvm::Value *SizeVal;
3769 QualType Ty = E->getType();
3770 if (OASE) {
3771 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3772 for (const Expr *SE : OASE->getDimensions()) {
3773 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3774 Sz = CGF.EmitScalarConversion(
3775 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3776 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3778 } else if (const auto *ASE =
3779 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3780 LValue UpAddrLVal =
3781 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
3782 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3783 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3784 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
3785 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3786 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3787 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3788 } else {
3789 SizeVal = CGF.getTypeSize(Ty);
3791 return std::make_pair(Addr, SizeVal);
3794 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3795 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3796 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3797 if (KmpTaskAffinityInfoTy.isNull()) {
3798 RecordDecl *KmpAffinityInfoRD =
3799 C.buildImplicitRecord("kmp_task_affinity_info_t");
3800 KmpAffinityInfoRD->startDefinition();
3801 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3802 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3803 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3804 KmpAffinityInfoRD->completeDefinition();
3805 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3809 CGOpenMPRuntime::TaskResultTy
3810 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3811 const OMPExecutableDirective &D,
3812 llvm::Function *TaskFunction, QualType SharedsTy,
3813 Address Shareds, const OMPTaskDataTy &Data) {
3814 ASTContext &C = CGM.getContext();
3815 llvm::SmallVector<PrivateDataTy, 4> Privates;
3816 // Aggregate privates and sort them by the alignment.
3817 const auto *I = Data.PrivateCopies.begin();
3818 for (const Expr *E : Data.PrivateVars) {
3819 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3820 Privates.emplace_back(
3821 C.getDeclAlign(VD),
3822 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3823 /*PrivateElemInit=*/nullptr));
3824 ++I;
3826 I = Data.FirstprivateCopies.begin();
3827 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3828 for (const Expr *E : Data.FirstprivateVars) {
3829 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3830 Privates.emplace_back(
3831 C.getDeclAlign(VD),
3832 PrivateHelpersTy(
3833 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3834 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3835 ++I;
3836 ++IElemInitRef;
3838 I = Data.LastprivateCopies.begin();
3839 for (const Expr *E : Data.LastprivateVars) {
3840 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3841 Privates.emplace_back(
3842 C.getDeclAlign(VD),
3843 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3844 /*PrivateElemInit=*/nullptr));
3845 ++I;
3847 for (const VarDecl *VD : Data.PrivateLocals) {
3848 if (isAllocatableDecl(VD))
3849 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3850 else
3851 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3853 llvm::stable_sort(Privates,
3854 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3855 return L.first > R.first;
3857 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3858 // Build type kmp_routine_entry_t (if not built yet).
3859 emitKmpRoutineEntryT(KmpInt32Ty);
3860 // Build type kmp_task_t (if not built yet).
3861 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3862 if (SavedKmpTaskloopTQTy.isNull()) {
3863 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3864 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3866 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3867 } else {
3868 assert((D.getDirectiveKind() == OMPD_task ||
3869 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3870 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3871 "Expected taskloop, task or target directive");
3872 if (SavedKmpTaskTQTy.isNull()) {
3873 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3874 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3876 KmpTaskTQTy = SavedKmpTaskTQTy;
3878 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3879 // Build particular struct kmp_task_t for the given task.
3880 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3881 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3882 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3883 QualType KmpTaskTWithPrivatesPtrQTy =
3884 C.getPointerType(KmpTaskTWithPrivatesQTy);
3885 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3886 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3887 KmpTaskTWithPrivatesTy->getPointerTo();
3888 llvm::Value *KmpTaskTWithPrivatesTySize =
3889 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3890 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3892 // Emit initial values for private copies (if any).
3893 llvm::Value *TaskPrivatesMap = nullptr;
3894 llvm::Type *TaskPrivatesMapTy =
3895 std::next(TaskFunction->arg_begin(), 3)->getType();
3896 if (!Privates.empty()) {
3897 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3898 TaskPrivatesMap =
3899 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3900 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3901 TaskPrivatesMap, TaskPrivatesMapTy);
3902 } else {
3903 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3904 cast<llvm::PointerType>(TaskPrivatesMapTy));
3906 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3907 // kmp_task_t *tt);
3908 llvm::Function *TaskEntry = emitProxyTaskFunction(
3909 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3910 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3911 TaskPrivatesMap);
3913 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3914 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3915 // kmp_routine_entry_t *task_entry);
3916 // Task flags. Format is taken from
3917 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3918 // description of kmp_tasking_flags struct.
3919 enum {
3920 TiedFlag = 0x1,
3921 FinalFlag = 0x2,
3922 DestructorsFlag = 0x8,
3923 PriorityFlag = 0x20,
3924 DetachableFlag = 0x40,
3926 unsigned Flags = Data.Tied ? TiedFlag : 0;
3927 bool NeedsCleanup = false;
3928 if (!Privates.empty()) {
3929 NeedsCleanup =
3930 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3931 if (NeedsCleanup)
3932 Flags = Flags | DestructorsFlag;
3934 if (Data.Priority.getInt())
3935 Flags = Flags | PriorityFlag;
3936 if (D.hasClausesOfKind<OMPDetachClause>())
3937 Flags = Flags | DetachableFlag;
3938 llvm::Value *TaskFlags =
3939 Data.Final.getPointer()
3940 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3941 CGF.Builder.getInt32(FinalFlag),
3942 CGF.Builder.getInt32(/*C=*/0))
3943 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3944 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3945 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3946 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3947 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3948 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3949 TaskEntry, KmpRoutineEntryPtrTy)};
3950 llvm::Value *NewTask;
3951 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3952 // Check if we have any device clause associated with the directive.
3953 const Expr *Device = nullptr;
3954 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3955 Device = C->getDevice();
3956 // Emit device ID if any otherwise use default value.
3957 llvm::Value *DeviceID;
3958 if (Device)
3959 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3960 CGF.Int64Ty, /*isSigned=*/true);
3961 else
3962 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3963 AllocArgs.push_back(DeviceID);
3964 NewTask = CGF.EmitRuntimeCall(
3965 OMPBuilder.getOrCreateRuntimeFunction(
3966 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3967 AllocArgs);
3968 } else {
3969 NewTask =
3970 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3971 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3972 AllocArgs);
3974 // Emit detach clause initialization.
3975 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3976 // task_descriptor);
3977 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3978 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3979 LValue EvtLVal = CGF.EmitLValue(Evt);
3981 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3982 // int gtid, kmp_task_t *task);
3983 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3984 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3985 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3986 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3987 OMPBuilder.getOrCreateRuntimeFunction(
3988 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3989 {Loc, Tid, NewTask});
3990 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3991 Evt->getExprLoc());
3992 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3994 // Process affinity clauses.
3995 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3996 // Process list of affinity data.
3997 ASTContext &C = CGM.getContext();
3998 Address AffinitiesArray = Address::invalid();
3999 // Calculate number of elements to form the array of affinity data.
4000 llvm::Value *NumOfElements = nullptr;
4001 unsigned NumAffinities = 0;
4002 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4003 if (const Expr *Modifier = C->getModifier()) {
4004 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4005 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4006 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4007 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4008 NumOfElements =
4009 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4011 } else {
4012 NumAffinities += C->varlist_size();
4015 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4016 // Fields ids in kmp_task_affinity_info record.
4017 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4019 QualType KmpTaskAffinityInfoArrayTy;
4020 if (NumOfElements) {
4021 NumOfElements = CGF.Builder.CreateNUWAdd(
4022 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4023 auto *OVE = new (C) OpaqueValueExpr(
4024 Loc,
4025 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4026 VK_PRValue);
4027 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4028 RValue::get(NumOfElements));
4029 KmpTaskAffinityInfoArrayTy =
4030 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4031 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4032 // Properly emit variable-sized array.
4033 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4034 ImplicitParamDecl::Other);
4035 CGF.EmitVarDecl(*PD);
4036 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4037 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4038 /*isSigned=*/false);
4039 } else {
4040 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4041 KmpTaskAffinityInfoTy,
4042 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4043 ArrayType::Normal, /*IndexTypeQuals=*/0);
4044 AffinitiesArray =
4045 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4046 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4047 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4048 /*isSigned=*/false);
4051 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4052 // Fill array by elements without iterators.
4053 unsigned Pos = 0;
4054 bool HasIterator = false;
4055 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4056 if (C->getModifier()) {
4057 HasIterator = true;
4058 continue;
4060 for (const Expr *E : C->varlists()) {
4061 llvm::Value *Addr;
4062 llvm::Value *Size;
4063 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4064 LValue Base =
4065 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4066 KmpTaskAffinityInfoTy);
4067 // affs[i].base_addr = &<Affinities[i].second>;
4068 LValue BaseAddrLVal = CGF.EmitLValueForField(
4069 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4070 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4071 BaseAddrLVal);
4072 // affs[i].len = sizeof(<Affinities[i].second>);
4073 LValue LenLVal = CGF.EmitLValueForField(
4074 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4075 CGF.EmitStoreOfScalar(Size, LenLVal);
4076 ++Pos;
4079 LValue PosLVal;
4080 if (HasIterator) {
4081 PosLVal = CGF.MakeAddrLValue(
4082 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4083 C.getSizeType());
4084 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4086 // Process elements with iterators.
4087 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4088 const Expr *Modifier = C->getModifier();
4089 if (!Modifier)
4090 continue;
4091 OMPIteratorGeneratorScope IteratorScope(
4092 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4093 for (const Expr *E : C->varlists()) {
4094 llvm::Value *Addr;
4095 llvm::Value *Size;
4096 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4097 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4098 LValue Base = CGF.MakeAddrLValue(
4099 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4100 // affs[i].base_addr = &<Affinities[i].second>;
4101 LValue BaseAddrLVal = CGF.EmitLValueForField(
4102 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4103 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4104 BaseAddrLVal);
4105 // affs[i].len = sizeof(<Affinities[i].second>);
4106 LValue LenLVal = CGF.EmitLValueForField(
4107 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4108 CGF.EmitStoreOfScalar(Size, LenLVal);
4109 Idx = CGF.Builder.CreateNUWAdd(
4110 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4111 CGF.EmitStoreOfScalar(Idx, PosLVal);
4114 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4115 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4116 // naffins, kmp_task_affinity_info_t *affin_list);
4117 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4118 llvm::Value *GTid = getThreadID(CGF, Loc);
4119 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4120 AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4121 // FIXME: Emit the function and ignore its result for now unless the
4122 // runtime function is properly implemented.
4123 (void)CGF.EmitRuntimeCall(
4124 OMPBuilder.getOrCreateRuntimeFunction(
4125 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4126 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4128 llvm::Value *NewTaskNewTaskTTy =
4129 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4130 NewTask, KmpTaskTWithPrivatesPtrTy);
4131 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4132 KmpTaskTWithPrivatesQTy);
4133 LValue TDBase =
4134 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4135 // Fill the data in the resulting kmp_task_t record.
4136 // Copy shareds if there are any.
4137 Address KmpTaskSharedsPtr = Address::invalid();
4138 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4139 KmpTaskSharedsPtr = Address(
4140 CGF.EmitLoadOfScalar(
4141 CGF.EmitLValueForField(
4142 TDBase,
4143 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4144 Loc),
4145 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4146 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4147 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4148 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4150 // Emit initial values for private copies (if any).
4151 TaskResultTy Result;
4152 if (!Privates.empty()) {
4153 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4154 SharedsTy, SharedsPtrTy, Data, Privates,
4155 /*ForDup=*/false);
4156 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4157 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4158 Result.TaskDupFn = emitTaskDupFunction(
4159 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4160 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4161 /*WithLastIter=*/!Data.LastprivateVars.empty());
4164 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4165 enum { Priority = 0, Destructors = 1 };
4166 // Provide pointer to function with destructors for privates.
4167 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4168 const RecordDecl *KmpCmplrdataUD =
4169 (*FI)->getType()->getAsUnionType()->getDecl();
4170 if (NeedsCleanup) {
4171 llvm::Value *DestructorFn = emitDestructorsFunction(
4172 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4173 KmpTaskTWithPrivatesQTy);
4174 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4175 LValue DestructorsLV = CGF.EmitLValueForField(
4176 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4177 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4178 DestructorFn, KmpRoutineEntryPtrTy),
4179 DestructorsLV);
4181 // Set priority.
4182 if (Data.Priority.getInt()) {
4183 LValue Data2LV = CGF.EmitLValueForField(
4184 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4185 LValue PriorityLV = CGF.EmitLValueForField(
4186 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4187 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4189 Result.NewTask = NewTask;
4190 Result.TaskEntry = TaskEntry;
4191 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4192 Result.TDBase = TDBase;
4193 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4194 return Result;
4197 /// Translates internal dependency kind into the runtime kind.
4198 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4199 RTLDependenceKindTy DepKind;
4200 switch (K) {
4201 case OMPC_DEPEND_in:
4202 DepKind = RTLDependenceKindTy::DepIn;
4203 break;
4204 // Out and InOut dependencies must use the same code.
4205 case OMPC_DEPEND_out:
4206 case OMPC_DEPEND_inout:
4207 DepKind = RTLDependenceKindTy::DepInOut;
4208 break;
4209 case OMPC_DEPEND_mutexinoutset:
4210 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4211 break;
4212 case OMPC_DEPEND_inoutset:
4213 DepKind = RTLDependenceKindTy::DepInOutSet;
4214 break;
4215 case OMPC_DEPEND_outallmemory:
4216 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4217 break;
4218 case OMPC_DEPEND_source:
4219 case OMPC_DEPEND_sink:
4220 case OMPC_DEPEND_depobj:
4221 case OMPC_DEPEND_inoutallmemory:
4222 case OMPC_DEPEND_unknown:
4223 llvm_unreachable("Unknown task dependence type");
4225 return DepKind;
4228 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4229 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4230 QualType &FlagsTy) {
4231 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4232 if (KmpDependInfoTy.isNull()) {
4233 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4234 KmpDependInfoRD->startDefinition();
4235 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4236 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4237 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4238 KmpDependInfoRD->completeDefinition();
4239 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4243 std::pair<llvm::Value *, LValue>
4244 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4245 SourceLocation Loc) {
4246 ASTContext &C = CGM.getContext();
4247 QualType FlagsTy;
4248 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4249 RecordDecl *KmpDependInfoRD =
4250 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4251 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4252 LValue Base = CGF.EmitLoadOfPointerLValue(
4253 CGF.Builder.CreateElementBitCast(
4254 DepobjLVal.getAddress(CGF),
4255 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4256 KmpDependInfoPtrTy->castAs<PointerType>());
4257 Address DepObjAddr = CGF.Builder.CreateGEP(
4258 Base.getAddress(CGF),
4259 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4260 LValue NumDepsBase = CGF.MakeAddrLValue(
4261 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4262 // NumDeps = deps[i].base_addr;
4263 LValue BaseAddrLVal = CGF.EmitLValueForField(
4264 NumDepsBase,
4265 *std::next(KmpDependInfoRD->field_begin(),
4266 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4267 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4268 return std::make_pair(NumDeps, Base);
4271 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4272 llvm::PointerUnion<unsigned *, LValue *> Pos,
4273 const OMPTaskDataTy::DependData &Data,
4274 Address DependenciesArray) {
4275 CodeGenModule &CGM = CGF.CGM;
4276 ASTContext &C = CGM.getContext();
4277 QualType FlagsTy;
4278 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4279 RecordDecl *KmpDependInfoRD =
4280 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4281 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4283 OMPIteratorGeneratorScope IteratorScope(
4284 CGF, cast_or_null<OMPIteratorExpr>(
4285 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4286 : nullptr));
4287 for (const Expr *E : Data.DepExprs) {
4288 llvm::Value *Addr;
4289 llvm::Value *Size;
4291 // The expression will be a nullptr in the 'omp_all_memory' case.
4292 if (E) {
4293 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4294 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4295 } else {
4296 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4297 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4299 LValue Base;
4300 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4301 Base = CGF.MakeAddrLValue(
4302 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4303 } else {
4304 assert(E && "Expected a non-null expression");
4305 LValue &PosLVal = *Pos.get<LValue *>();
4306 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4307 Base = CGF.MakeAddrLValue(
4308 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4310 // deps[i].base_addr = &<Dependencies[i].second>;
4311 LValue BaseAddrLVal = CGF.EmitLValueForField(
4312 Base,
4313 *std::next(KmpDependInfoRD->field_begin(),
4314 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4315 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4316 // deps[i].len = sizeof(<Dependencies[i].second>);
4317 LValue LenLVal = CGF.EmitLValueForField(
4318 Base, *std::next(KmpDependInfoRD->field_begin(),
4319 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4320 CGF.EmitStoreOfScalar(Size, LenLVal);
4321 // deps[i].flags = <Dependencies[i].first>;
4322 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4323 LValue FlagsLVal = CGF.EmitLValueForField(
4324 Base,
4325 *std::next(KmpDependInfoRD->field_begin(),
4326 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4327 CGF.EmitStoreOfScalar(
4328 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4329 FlagsLVal);
4330 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4331 ++(*P);
4332 } else {
4333 LValue &PosLVal = *Pos.get<LValue *>();
4334 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4335 Idx = CGF.Builder.CreateNUWAdd(Idx,
4336 llvm::ConstantInt::get(Idx->getType(), 1));
4337 CGF.EmitStoreOfScalar(Idx, PosLVal);
4342 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4343 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4344 const OMPTaskDataTy::DependData &Data) {
4345 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4346 "Expected depobj dependency kind.");
4347 SmallVector<llvm::Value *, 4> Sizes;
4348 SmallVector<LValue, 4> SizeLVals;
4349 ASTContext &C = CGF.getContext();
4351 OMPIteratorGeneratorScope IteratorScope(
4352 CGF, cast_or_null<OMPIteratorExpr>(
4353 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4354 : nullptr));
4355 for (const Expr *E : Data.DepExprs) {
4356 llvm::Value *NumDeps;
4357 LValue Base;
4358 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4359 std::tie(NumDeps, Base) =
4360 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4361 LValue NumLVal = CGF.MakeAddrLValue(
4362 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4363 C.getUIntPtrType());
4364 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4365 NumLVal.getAddress(CGF));
4366 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4367 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4368 CGF.EmitStoreOfScalar(Add, NumLVal);
4369 SizeLVals.push_back(NumLVal);
4372 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4373 llvm::Value *Size =
4374 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4375 Sizes.push_back(Size);
4377 return Sizes;
4380 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4381 QualType &KmpDependInfoTy,
4382 LValue PosLVal,
4383 const OMPTaskDataTy::DependData &Data,
4384 Address DependenciesArray) {
4385 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4386 "Expected depobj dependency kind.");
4387 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4389 OMPIteratorGeneratorScope IteratorScope(
4390 CGF, cast_or_null<OMPIteratorExpr>(
4391 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4392 : nullptr));
4393 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4394 const Expr *E = Data.DepExprs[I];
4395 llvm::Value *NumDeps;
4396 LValue Base;
4397 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4398 std::tie(NumDeps, Base) =
4399 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4401 // memcopy dependency data.
4402 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4403 ElSize,
4404 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4405 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4406 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4407 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4409 // Increase pos.
4410 // pos += size;
4411 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4412 CGF.EmitStoreOfScalar(Add, PosLVal);
4417 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4418 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4419 SourceLocation Loc) {
4420 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4421 return D.DepExprs.empty();
4423 return std::make_pair(nullptr, Address::invalid());
4424 // Process list of dependencies.
4425 ASTContext &C = CGM.getContext();
4426 Address DependenciesArray = Address::invalid();
4427 llvm::Value *NumOfElements = nullptr;
4428 unsigned NumDependencies = std::accumulate(
4429 Dependencies.begin(), Dependencies.end(), 0,
4430 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4431 return D.DepKind == OMPC_DEPEND_depobj
4433 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4435 QualType FlagsTy;
4436 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4437 bool HasDepobjDeps = false;
4438 bool HasRegularWithIterators = false;
4439 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4440 llvm::Value *NumOfRegularWithIterators =
4441 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4442 // Calculate number of depobj dependencies and regular deps with the
4443 // iterators.
4444 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4445 if (D.DepKind == OMPC_DEPEND_depobj) {
4446 SmallVector<llvm::Value *, 4> Sizes =
4447 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4448 for (llvm::Value *Size : Sizes) {
4449 NumOfDepobjElements =
4450 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4452 HasDepobjDeps = true;
4453 continue;
4455 // Include number of iterations, if any.
4457 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4458 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4459 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4460 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4461 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4462 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4463 NumOfRegularWithIterators =
4464 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4466 HasRegularWithIterators = true;
4467 continue;
4471 QualType KmpDependInfoArrayTy;
4472 if (HasDepobjDeps || HasRegularWithIterators) {
4473 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4474 /*isSigned=*/false);
4475 if (HasDepobjDeps) {
4476 NumOfElements =
4477 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4479 if (HasRegularWithIterators) {
4480 NumOfElements =
4481 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4483 auto *OVE = new (C) OpaqueValueExpr(
4484 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4485 VK_PRValue);
4486 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4487 RValue::get(NumOfElements));
4488 KmpDependInfoArrayTy =
4489 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4490 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4491 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4492 // Properly emit variable-sized array.
4493 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4494 ImplicitParamDecl::Other);
4495 CGF.EmitVarDecl(*PD);
4496 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4497 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4498 /*isSigned=*/false);
4499 } else {
4500 KmpDependInfoArrayTy = C.getConstantArrayType(
4501 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4502 ArrayType::Normal, /*IndexTypeQuals=*/0);
4503 DependenciesArray =
4504 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4505 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4506 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4507 /*isSigned=*/false);
4509 unsigned Pos = 0;
4510 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4511 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4512 Dependencies[I].IteratorExpr)
4513 continue;
4514 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4515 DependenciesArray);
4517 // Copy regular dependencies with iterators.
4518 LValue PosLVal = CGF.MakeAddrLValue(
4519 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4520 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4521 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4522 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4523 !Dependencies[I].IteratorExpr)
4524 continue;
4525 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4526 DependenciesArray);
4528 // Copy final depobj arrays without iterators.
4529 if (HasDepobjDeps) {
4530 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4531 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4532 continue;
4533 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4534 DependenciesArray);
4537 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4538 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4539 return std::make_pair(NumOfElements, DependenciesArray);
4542 Address CGOpenMPRuntime::emitDepobjDependClause(
4543 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4544 SourceLocation Loc) {
4545 if (Dependencies.DepExprs.empty())
4546 return Address::invalid();
4547 // Process list of dependencies.
4548 ASTContext &C = CGM.getContext();
4549 Address DependenciesArray = Address::invalid();
4550 unsigned NumDependencies = Dependencies.DepExprs.size();
4551 QualType FlagsTy;
4552 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4553 RecordDecl *KmpDependInfoRD =
4554 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4556 llvm::Value *Size;
4557 // Define type kmp_depend_info[<Dependencies.size()>];
4558 // For depobj reserve one extra element to store the number of elements.
4559 // It is required to handle depobj(x) update(in) construct.
4560 // kmp_depend_info[<Dependencies.size()>] deps;
4561 llvm::Value *NumDepsVal;
4562 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4563 if (const auto *IE =
4564 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4565 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4566 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4567 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4568 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4569 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4571 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4572 NumDepsVal);
4573 CharUnits SizeInBytes =
4574 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4575 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4576 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4577 NumDepsVal =
4578 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4579 } else {
4580 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4581 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4582 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4583 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4584 Size = CGM.getSize(Sz.alignTo(Align));
4585 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4587 // Need to allocate on the dynamic memory.
4588 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4589 // Use default allocator.
4590 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4591 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4593 llvm::Value *Addr =
4594 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4595 CGM.getModule(), OMPRTL___kmpc_alloc),
4596 Args, ".dep.arr.addr");
4597 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4598 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4599 Addr, KmpDependInfoLlvmTy->getPointerTo());
4600 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4601 // Write number of elements in the first element of array for depobj.
4602 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4603 // deps[i].base_addr = NumDependencies;
4604 LValue BaseAddrLVal = CGF.EmitLValueForField(
4605 Base,
4606 *std::next(KmpDependInfoRD->field_begin(),
4607 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4608 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4609 llvm::PointerUnion<unsigned *, LValue *> Pos;
4610 unsigned Idx = 1;
4611 LValue PosLVal;
4612 if (Dependencies.IteratorExpr) {
4613 PosLVal = CGF.MakeAddrLValue(
4614 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4615 C.getSizeType());
4616 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4617 /*IsInit=*/true);
4618 Pos = &PosLVal;
4619 } else {
4620 Pos = &Idx;
4622 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4623 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4624 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4625 CGF.Int8Ty);
4626 return DependenciesArray;
4629 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4630 SourceLocation Loc) {
4631 ASTContext &C = CGM.getContext();
4632 QualType FlagsTy;
4633 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4634 LValue Base = CGF.EmitLoadOfPointerLValue(
4635 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4636 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4637 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4638 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4639 CGF.ConvertTypeForMem(KmpDependInfoTy));
4640 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4641 Addr.getElementType(), Addr.getPointer(),
4642 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4643 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4644 CGF.VoidPtrTy);
4645 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4646 // Use default allocator.
4647 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4648 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4650 // _kmpc_free(gtid, addr, nullptr);
4651 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4652 CGM.getModule(), OMPRTL___kmpc_free),
4653 Args);
4656 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4657 OpenMPDependClauseKind NewDepKind,
4658 SourceLocation Loc) {
4659 ASTContext &C = CGM.getContext();
4660 QualType FlagsTy;
4661 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4662 RecordDecl *KmpDependInfoRD =
4663 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4664 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4665 llvm::Value *NumDeps;
4666 LValue Base;
4667 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4669 Address Begin = Base.getAddress(CGF);
4670 // Cast from pointer to array type to pointer to single element.
4671 llvm::Value *End = CGF.Builder.CreateGEP(
4672 Begin.getElementType(), Begin.getPointer(), NumDeps);
4673 // The basic structure here is a while-do loop.
4674 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4675 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4676 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4677 CGF.EmitBlock(BodyBB);
4678 llvm::PHINode *ElementPHI =
4679 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4680 ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4681 Begin = Begin.withPointer(ElementPHI);
4682 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4683 Base.getTBAAInfo());
4684 // deps[i].flags = NewDepKind;
4685 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4686 LValue FlagsLVal = CGF.EmitLValueForField(
4687 Base, *std::next(KmpDependInfoRD->field_begin(),
4688 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4689 CGF.EmitStoreOfScalar(
4690 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4691 FlagsLVal);
4693 // Shift the address forward by one element.
4694 Address ElementNext =
4695 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4696 ElementPHI->addIncoming(ElementNext.getPointer(),
4697 CGF.Builder.GetInsertBlock());
4698 llvm::Value *IsEmpty =
4699 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4700 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4701 // Done.
4702 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4705 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4706 const OMPExecutableDirective &D,
4707 llvm::Function *TaskFunction,
4708 QualType SharedsTy, Address Shareds,
4709 const Expr *IfCond,
4710 const OMPTaskDataTy &Data) {
4711 if (!CGF.HaveInsertPoint())
4712 return;
4714 TaskResultTy Result =
4715 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4716 llvm::Value *NewTask = Result.NewTask;
4717 llvm::Function *TaskEntry = Result.TaskEntry;
4718 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4719 LValue TDBase = Result.TDBase;
4720 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4721 // Process list of dependences.
4722 Address DependenciesArray = Address::invalid();
4723 llvm::Value *NumOfElements;
4724 std::tie(NumOfElements, DependenciesArray) =
4725 emitDependClause(CGF, Data.Dependences, Loc);
4727 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4728 // libcall.
4729 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4730 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4731 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4732 // list is not empty
4733 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4734 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4735 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4736 llvm::Value *DepTaskArgs[7];
4737 if (!Data.Dependences.empty()) {
4738 DepTaskArgs[0] = UpLoc;
4739 DepTaskArgs[1] = ThreadID;
4740 DepTaskArgs[2] = NewTask;
4741 DepTaskArgs[3] = NumOfElements;
4742 DepTaskArgs[4] = DependenciesArray.getPointer();
4743 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4744 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4746 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4747 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4748 if (!Data.Tied) {
4749 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4750 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4751 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4753 if (!Data.Dependences.empty()) {
4754 CGF.EmitRuntimeCall(
4755 OMPBuilder.getOrCreateRuntimeFunction(
4756 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4757 DepTaskArgs);
4758 } else {
4759 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4760 CGM.getModule(), OMPRTL___kmpc_omp_task),
4761 TaskArgs);
4763 // Check if parent region is untied and build return for untied task;
4764 if (auto *Region =
4765 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4766 Region->emitUntiedSwitch(CGF);
4769 llvm::Value *DepWaitTaskArgs[6];
4770 if (!Data.Dependences.empty()) {
4771 DepWaitTaskArgs[0] = UpLoc;
4772 DepWaitTaskArgs[1] = ThreadID;
4773 DepWaitTaskArgs[2] = NumOfElements;
4774 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4775 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4776 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4778 auto &M = CGM.getModule();
4779 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4780 TaskEntry, &Data, &DepWaitTaskArgs,
4781 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4782 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4783 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4784 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4785 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4786 // is specified.
4787 if (!Data.Dependences.empty())
4788 CGF.EmitRuntimeCall(
4789 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
4790 DepWaitTaskArgs);
4791 // Call proxy_task_entry(gtid, new_task);
4792 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4793 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4794 Action.Enter(CGF);
4795 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4796 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4797 OutlinedFnArgs);
4800 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4801 // kmp_task_t *new_task);
4802 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4803 // kmp_task_t *new_task);
4804 RegionCodeGenTy RCG(CodeGen);
4805 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4806 M, OMPRTL___kmpc_omp_task_begin_if0),
4807 TaskArgs,
4808 OMPBuilder.getOrCreateRuntimeFunction(
4809 M, OMPRTL___kmpc_omp_task_complete_if0),
4810 TaskArgs);
4811 RCG.setAction(Action);
4812 RCG(CGF);
4815 if (IfCond) {
4816 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4817 } else {
4818 RegionCodeGenTy ThenRCG(ThenCodeGen);
4819 ThenRCG(CGF);
4823 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4824 const OMPLoopDirective &D,
4825 llvm::Function *TaskFunction,
4826 QualType SharedsTy, Address Shareds,
4827 const Expr *IfCond,
4828 const OMPTaskDataTy &Data) {
4829 if (!CGF.HaveInsertPoint())
4830 return;
4831 TaskResultTy Result =
4832 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4833 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4834 // libcall.
4835 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4836 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4837 // sched, kmp_uint64 grainsize, void *task_dup);
4838 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4839 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4840 llvm::Value *IfVal;
4841 if (IfCond) {
4842 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4843 /*isSigned=*/true);
4844 } else {
4845 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4848 LValue LBLVal = CGF.EmitLValueForField(
4849 Result.TDBase,
4850 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4851 const auto *LBVar =
4852 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4853 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
4854 LBLVal.getQuals(),
4855 /*IsInitializer=*/true);
4856 LValue UBLVal = CGF.EmitLValueForField(
4857 Result.TDBase,
4858 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4859 const auto *UBVar =
4860 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4861 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
4862 UBLVal.getQuals(),
4863 /*IsInitializer=*/true);
4864 LValue StLVal = CGF.EmitLValueForField(
4865 Result.TDBase,
4866 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4867 const auto *StVar =
4868 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4869 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
4870 StLVal.getQuals(),
4871 /*IsInitializer=*/true);
4872 // Store reductions address.
4873 LValue RedLVal = CGF.EmitLValueForField(
4874 Result.TDBase,
4875 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4876 if (Data.Reductions) {
4877 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4878 } else {
4879 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
4880 CGF.getContext().VoidPtrTy);
4882 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4883 llvm::Value *TaskArgs[] = {
4884 UpLoc,
4885 ThreadID,
4886 Result.NewTask,
4887 IfVal,
4888 LBLVal.getPointer(CGF),
4889 UBLVal.getPointer(CGF),
4890 CGF.EmitLoadOfScalar(StLVal, Loc),
4891 llvm::ConstantInt::getSigned(
4892 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4893 llvm::ConstantInt::getSigned(
4894 CGF.IntTy, Data.Schedule.getPointer()
4895 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4896 : NoSchedule),
4897 Data.Schedule.getPointer()
4898 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4899 /*isSigned=*/false)
4900 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4901 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4902 Result.TaskDupFn, CGF.VoidPtrTy)
4903 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4904 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4905 CGM.getModule(), OMPRTL___kmpc_taskloop),
4906 TaskArgs);
4909 /// Emit reduction operation for each element of array (required for
4910 /// array sections) LHS op = RHS.
4911 /// \param Type Type of array.
4912 /// \param LHSVar Variable on the left side of the reduction operation
4913 /// (references element of array in original variable).
4914 /// \param RHSVar Variable on the right side of the reduction operation
4915 /// (references element of array in original variable).
4916 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4917 /// RHSVar.
4918 static void EmitOMPAggregateReduction(
4919 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4920 const VarDecl *RHSVar,
4921 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4922 const Expr *, const Expr *)> &RedOpGen,
4923 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4924 const Expr *UpExpr = nullptr) {
4925 // Perform element-by-element initialization.
4926 QualType ElementTy;
4927 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4928 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4930 // Drill down to the base element type on both arrays.
4931 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4932 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4934 llvm::Value *RHSBegin = RHSAddr.getPointer();
4935 llvm::Value *LHSBegin = LHSAddr.getPointer();
4936 // Cast from pointer to array type to pointer to single element.
4937 llvm::Value *LHSEnd =
4938 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4939 // The basic structure here is a while-do loop.
4940 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4941 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4942 llvm::Value *IsEmpty =
4943 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4944 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4946 // Enter the loop body, making that address the current address.
4947 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4948 CGF.EmitBlock(BodyBB);
4950 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4952 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4953 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4954 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4955 Address RHSElementCurrent(
4956 RHSElementPHI, RHSAddr.getElementType(),
4957 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4959 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4960 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4961 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4962 Address LHSElementCurrent(
4963 LHSElementPHI, LHSAddr.getElementType(),
4964 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4966 // Emit copy.
4967 CodeGenFunction::OMPPrivateScope Scope(CGF);
4968 Scope.addPrivate(LHSVar, LHSElementCurrent);
4969 Scope.addPrivate(RHSVar, RHSElementCurrent);
4970 Scope.Privatize();
4971 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4972 Scope.ForceCleanup();
4974 // Shift the address forward by one element.
4975 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4976 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4977 "omp.arraycpy.dest.element");
4978 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4979 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4980 "omp.arraycpy.src.element");
4981 // Check whether we've reached the end.
4982 llvm::Value *Done =
4983 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4984 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4985 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4986 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4988 // Done.
4989 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4992 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4993 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4994 /// UDR combiner function.
4995 static void emitReductionCombiner(CodeGenFunction &CGF,
4996 const Expr *ReductionOp) {
4997 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4998 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4999 if (const auto *DRE =
5000 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5001 if (const auto *DRD =
5002 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5003 std::pair<llvm::Function *, llvm::Function *> Reduction =
5004 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5005 RValue Func = RValue::get(Reduction.first);
5006 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5007 CGF.EmitIgnoredExpr(ReductionOp);
5008 return;
5010 CGF.EmitIgnoredExpr(ReductionOp);
5013 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5014 SourceLocation Loc, llvm::Type *ArgsElemType,
5015 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5016 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5017 ASTContext &C = CGM.getContext();
5019 // void reduction_func(void *LHSArg, void *RHSArg);
5020 FunctionArgList Args;
5021 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5022 ImplicitParamDecl::Other);
5023 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5024 ImplicitParamDecl::Other);
5025 Args.push_back(&LHSArg);
5026 Args.push_back(&RHSArg);
5027 const auto &CGFI =
5028 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5029 std::string Name = getName({"omp", "reduction", "reduction_func"});
5030 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5031 llvm::GlobalValue::InternalLinkage, Name,
5032 &CGM.getModule());
5033 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5034 Fn->setDoesNotRecurse();
5035 CodeGenFunction CGF(CGM);
5036 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5038 // Dst = (void*[n])(LHSArg);
5039 // Src = (void*[n])(RHSArg);
5040 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5041 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5042 ArgsElemType->getPointerTo()),
5043 ArgsElemType, CGF.getPointerAlign());
5044 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5045 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5046 ArgsElemType->getPointerTo()),
5047 ArgsElemType, CGF.getPointerAlign());
5049 // ...
5050 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5051 // ...
5052 CodeGenFunction::OMPPrivateScope Scope(CGF);
5053 const auto *IPriv = Privates.begin();
5054 unsigned Idx = 0;
5055 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5056 const auto *RHSVar =
5057 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5058 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5059 const auto *LHSVar =
5060 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5061 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5062 QualType PrivTy = (*IPriv)->getType();
5063 if (PrivTy->isVariablyModifiedType()) {
5064 // Get array size and emit VLA type.
5065 ++Idx;
5066 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5067 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5068 const VariableArrayType *VLA =
5069 CGF.getContext().getAsVariableArrayType(PrivTy);
5070 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5071 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5072 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5073 CGF.EmitVariablyModifiedType(PrivTy);
5076 Scope.Privatize();
5077 IPriv = Privates.begin();
5078 const auto *ILHS = LHSExprs.begin();
5079 const auto *IRHS = RHSExprs.begin();
5080 for (const Expr *E : ReductionOps) {
5081 if ((*IPriv)->getType()->isArrayType()) {
5082 // Emit reduction for array section.
5083 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5084 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5085 EmitOMPAggregateReduction(
5086 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5087 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5088 emitReductionCombiner(CGF, E);
5090 } else {
5091 // Emit reduction for array subscript or single variable.
5092 emitReductionCombiner(CGF, E);
5094 ++IPriv;
5095 ++ILHS;
5096 ++IRHS;
5098 Scope.ForceCleanup();
5099 CGF.FinishFunction();
5100 return Fn;
5103 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5104 const Expr *ReductionOp,
5105 const Expr *PrivateRef,
5106 const DeclRefExpr *LHS,
5107 const DeclRefExpr *RHS) {
5108 if (PrivateRef->getType()->isArrayType()) {
5109 // Emit reduction for array section.
5110 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5111 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5112 EmitOMPAggregateReduction(
5113 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5114 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5115 emitReductionCombiner(CGF, ReductionOp);
5117 } else {
5118 // Emit reduction for array subscript or single variable.
5119 emitReductionCombiner(CGF, ReductionOp);
5123 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5124 ArrayRef<const Expr *> Privates,
5125 ArrayRef<const Expr *> LHSExprs,
5126 ArrayRef<const Expr *> RHSExprs,
5127 ArrayRef<const Expr *> ReductionOps,
5128 ReductionOptionsTy Options) {
5129 if (!CGF.HaveInsertPoint())
5130 return;
5132 bool WithNowait = Options.WithNowait;
5133 bool SimpleReduction = Options.SimpleReduction;
5135 // Next code should be emitted for reduction:
5137 // static kmp_critical_name lock = { 0 };
5139 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5140 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5141 // ...
5142 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5143 // *(Type<n>-1*)rhs[<n>-1]);
5144 // }
5146 // ...
5147 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5148 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5149 // RedList, reduce_func, &<lock>)) {
5150 // case 1:
5151 // ...
5152 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5153 // ...
5154 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5155 // break;
5156 // case 2:
5157 // ...
5158 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5159 // ...
5160 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5161 // break;
5162 // default:;
5163 // }
5165 // if SimpleReduction is true, only the next code is generated:
5166 // ...
5167 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5168 // ...
5170 ASTContext &C = CGM.getContext();
5172 if (SimpleReduction) {
5173 CodeGenFunction::RunCleanupsScope Scope(CGF);
5174 const auto *IPriv = Privates.begin();
5175 const auto *ILHS = LHSExprs.begin();
5176 const auto *IRHS = RHSExprs.begin();
5177 for (const Expr *E : ReductionOps) {
5178 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5179 cast<DeclRefExpr>(*IRHS));
5180 ++IPriv;
5181 ++ILHS;
5182 ++IRHS;
5184 return;
5187 // 1. Build a list of reduction variables.
5188 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5189 auto Size = RHSExprs.size();
5190 for (const Expr *E : Privates) {
5191 if (E->getType()->isVariablyModifiedType())
5192 // Reserve place for array size.
5193 ++Size;
5195 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5196 QualType ReductionArrayTy =
5197 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5198 /*IndexTypeQuals=*/0);
5199 Address ReductionList =
5200 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5201 const auto *IPriv = Privates.begin();
5202 unsigned Idx = 0;
5203 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5204 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5205 CGF.Builder.CreateStore(
5206 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5207 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5208 Elem);
5209 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5210 // Store array size.
5211 ++Idx;
5212 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5213 llvm::Value *Size = CGF.Builder.CreateIntCast(
5214 CGF.getVLASize(
5215 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5216 .NumElts,
5217 CGF.SizeTy, /*isSigned=*/false);
5218 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5219 Elem);
5223 // 2. Emit reduce_func().
5224 llvm::Function *ReductionFn =
5225 emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5226 Privates, LHSExprs, RHSExprs, ReductionOps);
5228 // 3. Create static kmp_critical_name lock = { 0 };
5229 std::string Name = getName({"reduction"});
5230 llvm::Value *Lock = getCriticalRegionLock(Name);
5232 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5233 // RedList, reduce_func, &<lock>);
5234 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5235 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5236 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5237 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5238 ReductionList.getPointer(), CGF.VoidPtrTy);
5239 llvm::Value *Args[] = {
5240 IdentTLoc, // ident_t *<loc>
5241 ThreadId, // i32 <gtid>
5242 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5243 ReductionArrayTySize, // size_type sizeof(RedList)
5244 RL, // void *RedList
5245 ReductionFn, // void (*) (void *, void *) <reduce_func>
5246 Lock // kmp_critical_name *&<lock>
5248 llvm::Value *Res = CGF.EmitRuntimeCall(
5249 OMPBuilder.getOrCreateRuntimeFunction(
5250 CGM.getModule(),
5251 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5252 Args);
5254 // 5. Build switch(res)
5255 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5256 llvm::SwitchInst *SwInst =
5257 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5259 // 6. Build case 1:
5260 // ...
5261 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5262 // ...
5263 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5264 // break;
5265 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5266 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5267 CGF.EmitBlock(Case1BB);
5269 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5270 llvm::Value *EndArgs[] = {
5271 IdentTLoc, // ident_t *<loc>
5272 ThreadId, // i32 <gtid>
5273 Lock // kmp_critical_name *&<lock>
5275 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5276 CodeGenFunction &CGF, PrePostActionTy &Action) {
5277 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5278 const auto *IPriv = Privates.begin();
5279 const auto *ILHS = LHSExprs.begin();
5280 const auto *IRHS = RHSExprs.begin();
5281 for (const Expr *E : ReductionOps) {
5282 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5283 cast<DeclRefExpr>(*IRHS));
5284 ++IPriv;
5285 ++ILHS;
5286 ++IRHS;
5289 RegionCodeGenTy RCG(CodeGen);
5290 CommonActionTy Action(
5291 nullptr, llvm::None,
5292 OMPBuilder.getOrCreateRuntimeFunction(
5293 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5294 : OMPRTL___kmpc_end_reduce),
5295 EndArgs);
5296 RCG.setAction(Action);
5297 RCG(CGF);
5299 CGF.EmitBranch(DefaultBB);
5301 // 7. Build case 2:
5302 // ...
5303 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5304 // ...
5305 // break;
5306 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5307 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5308 CGF.EmitBlock(Case2BB);
5310 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5311 CodeGenFunction &CGF, PrePostActionTy &Action) {
5312 const auto *ILHS = LHSExprs.begin();
5313 const auto *IRHS = RHSExprs.begin();
5314 const auto *IPriv = Privates.begin();
5315 for (const Expr *E : ReductionOps) {
5316 const Expr *XExpr = nullptr;
5317 const Expr *EExpr = nullptr;
5318 const Expr *UpExpr = nullptr;
5319 BinaryOperatorKind BO = BO_Comma;
5320 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5321 if (BO->getOpcode() == BO_Assign) {
5322 XExpr = BO->getLHS();
5323 UpExpr = BO->getRHS();
5326 // Try to emit update expression as a simple atomic.
5327 const Expr *RHSExpr = UpExpr;
5328 if (RHSExpr) {
5329 // Analyze RHS part of the whole expression.
5330 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5331 RHSExpr->IgnoreParenImpCasts())) {
5332 // If this is a conditional operator, analyze its condition for
5333 // min/max reduction operator.
5334 RHSExpr = ACO->getCond();
5336 if (const auto *BORHS =
5337 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5338 EExpr = BORHS->getRHS();
5339 BO = BORHS->getOpcode();
5342 if (XExpr) {
5343 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5344 auto &&AtomicRedGen = [BO, VD,
5345 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5346 const Expr *EExpr, const Expr *UpExpr) {
5347 LValue X = CGF.EmitLValue(XExpr);
5348 RValue E;
5349 if (EExpr)
5350 E = CGF.EmitAnyExpr(EExpr);
5351 CGF.EmitOMPAtomicSimpleUpdateExpr(
5352 X, E, BO, /*IsXLHSInRHSPart=*/true,
5353 llvm::AtomicOrdering::Monotonic, Loc,
5354 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5355 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5356 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5357 CGF.emitOMPSimpleStore(
5358 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5359 VD->getType().getNonReferenceType(), Loc);
5360 PrivateScope.addPrivate(VD, LHSTemp);
5361 (void)PrivateScope.Privatize();
5362 return CGF.EmitAnyExpr(UpExpr);
5365 if ((*IPriv)->getType()->isArrayType()) {
5366 // Emit atomic reduction for array section.
5367 const auto *RHSVar =
5368 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5369 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5370 AtomicRedGen, XExpr, EExpr, UpExpr);
5371 } else {
5372 // Emit atomic reduction for array subscript or single variable.
5373 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5375 } else {
5376 // Emit as a critical region.
5377 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5378 const Expr *, const Expr *) {
5379 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5380 std::string Name = RT.getName({"atomic_reduction"});
5381 RT.emitCriticalRegion(
5382 CGF, Name,
5383 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5384 Action.Enter(CGF);
5385 emitReductionCombiner(CGF, E);
5387 Loc);
5389 if ((*IPriv)->getType()->isArrayType()) {
5390 const auto *LHSVar =
5391 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5392 const auto *RHSVar =
5393 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5394 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5395 CritRedGen);
5396 } else {
5397 CritRedGen(CGF, nullptr, nullptr, nullptr);
5400 ++ILHS;
5401 ++IRHS;
5402 ++IPriv;
5405 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5406 if (!WithNowait) {
5407 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5408 llvm::Value *EndArgs[] = {
5409 IdentTLoc, // ident_t *<loc>
5410 ThreadId, // i32 <gtid>
5411 Lock // kmp_critical_name *&<lock>
5413 CommonActionTy Action(nullptr, llvm::None,
5414 OMPBuilder.getOrCreateRuntimeFunction(
5415 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5416 EndArgs);
5417 AtomicRCG.setAction(Action);
5418 AtomicRCG(CGF);
5419 } else {
5420 AtomicRCG(CGF);
5423 CGF.EmitBranch(DefaultBB);
5424 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5427 /// Generates unique name for artificial threadprivate variables.
5428 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5429 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5430 const Expr *Ref) {
5431 SmallString<256> Buffer;
5432 llvm::raw_svector_ostream Out(Buffer);
5433 const clang::DeclRefExpr *DE;
5434 const VarDecl *D = ::getBaseDecl(Ref, DE);
5435 if (!D)
5436 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5437 D = D->getCanonicalDecl();
5438 std::string Name = CGM.getOpenMPRuntime().getName(
5439 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5440 Out << Prefix << Name << "_"
5441 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5442 return std::string(Out.str());
5445 /// Emits reduction initializer function:
5446 /// \code
5447 /// void @.red_init(void* %arg, void* %orig) {
5448 /// %0 = bitcast void* %arg to <type>*
5449 /// store <type> <init>, <type>* %0
5450 /// ret void
5451 /// }
5452 /// \endcode
5453 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5454 SourceLocation Loc,
5455 ReductionCodeGen &RCG, unsigned N) {
5456 ASTContext &C = CGM.getContext();
5457 QualType VoidPtrTy = C.VoidPtrTy;
5458 VoidPtrTy.addRestrict();
5459 FunctionArgList Args;
5460 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5461 ImplicitParamDecl::Other);
5462 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5463 ImplicitParamDecl::Other);
5464 Args.emplace_back(&Param);
5465 Args.emplace_back(&ParamOrig);
5466 const auto &FnInfo =
5467 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5468 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5469 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5470 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5471 Name, &CGM.getModule());
5472 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5473 Fn->setDoesNotRecurse();
5474 CodeGenFunction CGF(CGM);
5475 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5476 QualType PrivateType = RCG.getPrivateType(N);
5477 Address PrivateAddr = CGF.EmitLoadOfPointer(
5478 CGF.Builder.CreateElementBitCast(
5479 CGF.GetAddrOfLocalVar(&Param),
5480 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5481 C.getPointerType(PrivateType)->castAs<PointerType>());
5482 llvm::Value *Size = nullptr;
5483 // If the size of the reduction item is non-constant, load it from global
5484 // threadprivate variable.
5485 if (RCG.getSizes(N).second) {
5486 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5487 CGF, CGM.getContext().getSizeType(),
5488 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5489 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5490 CGM.getContext().getSizeType(), Loc);
5492 RCG.emitAggregateType(CGF, N, Size);
5493 Address OrigAddr = Address::invalid();
5494 // If initializer uses initializer from declare reduction construct, emit a
5495 // pointer to the address of the original reduction item (reuired by reduction
5496 // initializer)
5497 if (RCG.usesReductionInitializer(N)) {
5498 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5499 OrigAddr = CGF.EmitLoadOfPointer(
5500 SharedAddr,
5501 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5503 // Emit the initializer:
5504 // %0 = bitcast void* %arg to <type>*
5505 // store <type> <init>, <type>* %0
5506 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5507 [](CodeGenFunction &) { return false; });
5508 CGF.FinishFunction();
5509 return Fn;
5512 /// Emits reduction combiner function:
5513 /// \code
5514 /// void @.red_comb(void* %arg0, void* %arg1) {
5515 /// %lhs = bitcast void* %arg0 to <type>*
5516 /// %rhs = bitcast void* %arg1 to <type>*
5517 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5518 /// store <type> %2, <type>* %lhs
5519 /// ret void
5520 /// }
5521 /// \endcode
5522 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5523 SourceLocation Loc,
5524 ReductionCodeGen &RCG, unsigned N,
5525 const Expr *ReductionOp,
5526 const Expr *LHS, const Expr *RHS,
5527 const Expr *PrivateRef) {
5528 ASTContext &C = CGM.getContext();
5529 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5530 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5531 FunctionArgList Args;
5532 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5533 C.VoidPtrTy, ImplicitParamDecl::Other);
5534 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5535 ImplicitParamDecl::Other);
5536 Args.emplace_back(&ParamInOut);
5537 Args.emplace_back(&ParamIn);
5538 const auto &FnInfo =
5539 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5540 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5541 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5542 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5543 Name, &CGM.getModule());
5544 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5545 Fn->setDoesNotRecurse();
5546 CodeGenFunction CGF(CGM);
5547 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5548 llvm::Value *Size = nullptr;
5549 // If the size of the reduction item is non-constant, load it from global
5550 // threadprivate variable.
5551 if (RCG.getSizes(N).second) {
5552 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5553 CGF, CGM.getContext().getSizeType(),
5554 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5555 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5556 CGM.getContext().getSizeType(), Loc);
5558 RCG.emitAggregateType(CGF, N, Size);
5559 // Remap lhs and rhs variables to the addresses of the function arguments.
5560 // %lhs = bitcast void* %arg0 to <type>*
5561 // %rhs = bitcast void* %arg1 to <type>*
5562 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5563 PrivateScope.addPrivate(
5564 LHSVD,
5565 // Pull out the pointer to the variable.
5566 CGF.EmitLoadOfPointer(
5567 CGF.Builder.CreateElementBitCast(
5568 CGF.GetAddrOfLocalVar(&ParamInOut),
5569 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5570 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5571 PrivateScope.addPrivate(
5572 RHSVD,
5573 // Pull out the pointer to the variable.
5574 CGF.EmitLoadOfPointer(
5575 CGF.Builder.CreateElementBitCast(
5576 CGF.GetAddrOfLocalVar(&ParamIn),
5577 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5578 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5579 PrivateScope.Privatize();
5580 // Emit the combiner body:
5581 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5582 // store <type> %2, <type>* %lhs
5583 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5584 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5585 cast<DeclRefExpr>(RHS));
5586 CGF.FinishFunction();
5587 return Fn;
5590 /// Emits reduction finalizer function:
5591 /// \code
5592 /// void @.red_fini(void* %arg) {
5593 /// %0 = bitcast void* %arg to <type>*
5594 /// <destroy>(<type>* %0)
5595 /// ret void
5596 /// }
5597 /// \endcode
5598 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5599 SourceLocation Loc,
5600 ReductionCodeGen &RCG, unsigned N) {
5601 if (!RCG.needCleanups(N))
5602 return nullptr;
5603 ASTContext &C = CGM.getContext();
5604 FunctionArgList Args;
5605 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5606 ImplicitParamDecl::Other);
5607 Args.emplace_back(&Param);
5608 const auto &FnInfo =
5609 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5610 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5611 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5612 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5613 Name, &CGM.getModule());
5614 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5615 Fn->setDoesNotRecurse();
5616 CodeGenFunction CGF(CGM);
5617 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5618 Address PrivateAddr = CGF.EmitLoadOfPointer(
5619 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5620 llvm::Value *Size = nullptr;
5621 // If the size of the reduction item is non-constant, load it from global
5622 // threadprivate variable.
5623 if (RCG.getSizes(N).second) {
5624 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5625 CGF, CGM.getContext().getSizeType(),
5626 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5627 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5628 CGM.getContext().getSizeType(), Loc);
5630 RCG.emitAggregateType(CGF, N, Size);
5631 // Emit the finalizer body:
5632 // <destroy>(<type>* %0)
5633 RCG.emitCleanups(CGF, N, PrivateAddr);
5634 CGF.FinishFunction(Loc);
5635 return Fn;
5638 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5639 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5640 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5641 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5642 return nullptr;
5644 // Build typedef struct:
5645 // kmp_taskred_input {
5646 // void *reduce_shar; // shared reduction item
5647 // void *reduce_orig; // original reduction item used for initialization
5648 // size_t reduce_size; // size of data item
5649 // void *reduce_init; // data initialization routine
5650 // void *reduce_fini; // data finalization routine
5651 // void *reduce_comb; // data combiner routine
5652 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5653 // } kmp_taskred_input_t;
5654 ASTContext &C = CGM.getContext();
5655 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5656 RD->startDefinition();
5657 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5658 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5659 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5660 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5661 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5662 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5663 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5664 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5665 RD->completeDefinition();
5666 QualType RDType = C.getRecordType(RD);
5667 unsigned Size = Data.ReductionVars.size();
5668 llvm::APInt ArraySize(/*numBits=*/64, Size);
5669 QualType ArrayRDType = C.getConstantArrayType(
5670 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5671 // kmp_task_red_input_t .rd_input.[Size];
5672 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5673 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5674 Data.ReductionCopies, Data.ReductionOps);
5675 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5676 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5677 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5678 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5679 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5680 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5681 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5682 ".rd_input.gep.");
5683 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5684 // ElemLVal.reduce_shar = &Shareds[Cnt];
5685 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5686 RCG.emitSharedOrigLValue(CGF, Cnt);
5687 llvm::Value *CastedShared =
5688 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
5689 CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5690 // ElemLVal.reduce_orig = &Origs[Cnt];
5691 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5692 llvm::Value *CastedOrig =
5693 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
5694 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
5695 RCG.emitAggregateType(CGF, Cnt);
5696 llvm::Value *SizeValInChars;
5697 llvm::Value *SizeVal;
5698 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5699 // We use delayed creation/initialization for VLAs and array sections. It is
5700 // required because runtime does not provide the way to pass the sizes of
5701 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5702 // threadprivate global variables are used to store these values and use
5703 // them in the functions.
5704 bool DelayedCreation = !!SizeVal;
5705 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5706 /*isSigned=*/false);
5707 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5708 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5709 // ElemLVal.reduce_init = init;
5710 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5711 llvm::Value *InitAddr =
5712 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
5713 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5714 // ElemLVal.reduce_fini = fini;
5715 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5716 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5717 llvm::Value *FiniAddr = Fini
5718 ? CGF.EmitCastToVoidPtr(Fini)
5719 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5720 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5721 // ElemLVal.reduce_comb = comb;
5722 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5723 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
5724 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5725 RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
5726 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5727 // ElemLVal.flags = 0;
5728 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5729 if (DelayedCreation) {
5730 CGF.EmitStoreOfScalar(
5731 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5732 FlagsLVal);
5733 } else
5734 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
5735 FlagsLVal.getType());
5737 if (Data.IsReductionWithTaskMod) {
5738 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5739 // is_ws, int num, void *data);
5740 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5741 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5742 CGM.IntTy, /*isSigned=*/true);
5743 llvm::Value *Args[] = {
5744 IdentTLoc, GTid,
5745 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5746 /*isSigned=*/true),
5747 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5748 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5749 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5750 return CGF.EmitRuntimeCall(
5751 OMPBuilder.getOrCreateRuntimeFunction(
5752 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5753 Args);
5755 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5756 llvm::Value *Args[] = {
5757 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5758 /*isSigned=*/true),
5759 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5760 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5761 CGM.VoidPtrTy)};
5762 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5763 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5764 Args);
5767 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5768 SourceLocation Loc,
5769 bool IsWorksharingReduction) {
5770 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5771 // is_ws, int num, void *data);
5772 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5773 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5774 CGM.IntTy, /*isSigned=*/true);
5775 llvm::Value *Args[] = {IdentTLoc, GTid,
5776 llvm::ConstantInt::get(CGM.IntTy,
5777 IsWorksharingReduction ? 1 : 0,
5778 /*isSigned=*/true)};
5779 (void)CGF.EmitRuntimeCall(
5780 OMPBuilder.getOrCreateRuntimeFunction(
5781 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5782 Args);
5785 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5786 SourceLocation Loc,
5787 ReductionCodeGen &RCG,
5788 unsigned N) {
5789 auto Sizes = RCG.getSizes(N);
5790 // Emit threadprivate global variable if the type is non-constant
5791 // (Sizes.second = nullptr).
5792 if (Sizes.second) {
5793 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5794 /*isSigned=*/false);
5795 Address SizeAddr = getAddrOfArtificialThreadPrivate(
5796 CGF, CGM.getContext().getSizeType(),
5797 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5798 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5802 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5803 SourceLocation Loc,
5804 llvm::Value *ReductionsPtr,
5805 LValue SharedLVal) {
5806 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5807 // *d);
5808 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5809 CGM.IntTy,
5810 /*isSigned=*/true),
5811 ReductionsPtr,
5812 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5813 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5814 return Address(
5815 CGF.EmitRuntimeCall(
5816 OMPBuilder.getOrCreateRuntimeFunction(
5817 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5818 Args),
5819 CGF.Int8Ty, SharedLVal.getAlignment());
5822 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5823 const OMPTaskDataTy &Data) {
5824 if (!CGF.HaveInsertPoint())
5825 return;
5827 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5828 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5829 OMPBuilder.createTaskwait(CGF.Builder);
5830 } else {
5831 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5832 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5833 auto &M = CGM.getModule();
5834 Address DependenciesArray = Address::invalid();
5835 llvm::Value *NumOfElements;
5836 std::tie(NumOfElements, DependenciesArray) =
5837 emitDependClause(CGF, Data.Dependences, Loc);
5838 llvm::Value *DepWaitTaskArgs[6];
5839 if (!Data.Dependences.empty()) {
5840 DepWaitTaskArgs[0] = UpLoc;
5841 DepWaitTaskArgs[1] = ThreadID;
5842 DepWaitTaskArgs[2] = NumOfElements;
5843 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5844 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5845 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5847 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5849 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5850 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5851 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5852 // is specified.
5853 CGF.EmitRuntimeCall(
5854 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5855 DepWaitTaskArgs);
5857 } else {
5859 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5860 // global_tid);
5861 llvm::Value *Args[] = {UpLoc, ThreadID};
5862 // Ignore return result until untied tasks are supported.
5863 CGF.EmitRuntimeCall(
5864 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5865 Args);
5869 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5870 Region->emitUntiedSwitch(CGF);
5873 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5874 OpenMPDirectiveKind InnerKind,
5875 const RegionCodeGenTy &CodeGen,
5876 bool HasCancel) {
5877 if (!CGF.HaveInsertPoint())
5878 return;
5879 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5880 InnerKind != OMPD_critical &&
5881 InnerKind != OMPD_master &&
5882 InnerKind != OMPD_masked);
5883 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5886 namespace {
5887 enum RTCancelKind {
5888 CancelNoreq = 0,
5889 CancelParallel = 1,
5890 CancelLoop = 2,
5891 CancelSections = 3,
5892 CancelTaskgroup = 4
5894 } // anonymous namespace
5896 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5897 RTCancelKind CancelKind = CancelNoreq;
5898 if (CancelRegion == OMPD_parallel)
5899 CancelKind = CancelParallel;
5900 else if (CancelRegion == OMPD_for)
5901 CancelKind = CancelLoop;
5902 else if (CancelRegion == OMPD_sections)
5903 CancelKind = CancelSections;
5904 else {
5905 assert(CancelRegion == OMPD_taskgroup);
5906 CancelKind = CancelTaskgroup;
5908 return CancelKind;
5911 void CGOpenMPRuntime::emitCancellationPointCall(
5912 CodeGenFunction &CGF, SourceLocation Loc,
5913 OpenMPDirectiveKind CancelRegion) {
5914 if (!CGF.HaveInsertPoint())
5915 return;
5916 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5917 // global_tid, kmp_int32 cncl_kind);
5918 if (auto *OMPRegionInfo =
5919 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5920 // For 'cancellation point taskgroup', the task region info may not have a
5921 // cancel. This may instead happen in another adjacent task.
5922 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5923 llvm::Value *Args[] = {
5924 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5925 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5926 // Ignore return result until untied tasks are supported.
5927 llvm::Value *Result = CGF.EmitRuntimeCall(
5928 OMPBuilder.getOrCreateRuntimeFunction(
5929 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5930 Args);
5931 // if (__kmpc_cancellationpoint()) {
5932 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5933 // exit from construct;
5934 // }
5935 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5936 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5937 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5938 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5939 CGF.EmitBlock(ExitBB);
5940 if (CancelRegion == OMPD_parallel)
5941 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5942 // exit from construct;
5943 CodeGenFunction::JumpDest CancelDest =
5944 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5945 CGF.EmitBranchThroughCleanup(CancelDest);
5946 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5951 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5952 const Expr *IfCond,
5953 OpenMPDirectiveKind CancelRegion) {
5954 if (!CGF.HaveInsertPoint())
5955 return;
5956 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5957 // kmp_int32 cncl_kind);
5958 auto &M = CGM.getModule();
5959 if (auto *OMPRegionInfo =
5960 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5961 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5962 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5963 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5964 llvm::Value *Args[] = {
5965 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5966 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5967 // Ignore return result until untied tasks are supported.
5968 llvm::Value *Result = CGF.EmitRuntimeCall(
5969 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5970 // if (__kmpc_cancel()) {
5971 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5972 // exit from construct;
5973 // }
5974 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5975 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5976 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5977 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5978 CGF.EmitBlock(ExitBB);
5979 if (CancelRegion == OMPD_parallel)
5980 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5981 // exit from construct;
5982 CodeGenFunction::JumpDest CancelDest =
5983 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5984 CGF.EmitBranchThroughCleanup(CancelDest);
5985 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5987 if (IfCond) {
5988 emitIfClause(CGF, IfCond, ThenGen,
5989 [](CodeGenFunction &, PrePostActionTy &) {});
5990 } else {
5991 RegionCodeGenTy ThenRCG(ThenGen);
5992 ThenRCG(CGF);
5997 namespace {
5998 /// Cleanup action for uses_allocators support.
5999 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6000 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6002 public:
6003 OMPUsesAllocatorsActionTy(
6004 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6005 : Allocators(Allocators) {}
6006 void Enter(CodeGenFunction &CGF) override {
6007 if (!CGF.HaveInsertPoint())
6008 return;
6009 for (const auto &AllocatorData : Allocators) {
6010 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6011 CGF, AllocatorData.first, AllocatorData.second);
6014 void Exit(CodeGenFunction &CGF) override {
6015 if (!CGF.HaveInsertPoint())
6016 return;
6017 for (const auto &AllocatorData : Allocators) {
6018 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6019 AllocatorData.first);
6023 } // namespace
6025 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6026 const OMPExecutableDirective &D, StringRef ParentName,
6027 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6028 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6029 assert(!ParentName.empty() && "Invalid target entry parent name!");
6030 HasEmittedTargetRegion = true;
6031 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6032 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6033 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6034 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6035 if (!D.AllocatorTraits)
6036 continue;
6037 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6040 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6041 CodeGen.setAction(UsesAllocatorAction);
6042 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6043 IsOffloadEntry, CodeGen);
6046 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6047 const Expr *Allocator,
6048 const Expr *AllocatorTraits) {
6049 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6050 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6051 // Use default memspace handle.
6052 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6053 llvm::Value *NumTraits = llvm::ConstantInt::get(
6054 CGF.IntTy, cast<ConstantArrayType>(
6055 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6056 ->getSize()
6057 .getLimitedValue());
6058 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6059 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6060 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6061 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6062 AllocatorTraitsLVal.getBaseInfo(),
6063 AllocatorTraitsLVal.getTBAAInfo());
6064 llvm::Value *Traits =
6065 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6067 llvm::Value *AllocatorVal =
6068 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6069 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6070 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6071 // Store to allocator.
6072 CGF.EmitVarDecl(*cast<VarDecl>(
6073 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6074 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6075 AllocatorVal =
6076 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6077 Allocator->getType(), Allocator->getExprLoc());
6078 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6081 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6082 const Expr *Allocator) {
6083 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6084 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6085 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6086 llvm::Value *AllocatorVal =
6087 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6088 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6089 CGF.getContext().VoidPtrTy,
6090 Allocator->getExprLoc());
6091 (void)CGF.EmitRuntimeCall(
6092 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6093 OMPRTL___kmpc_destroy_allocator),
6094 {ThreadId, AllocatorVal});
6097 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6098 const OMPExecutableDirective &D, StringRef ParentName,
6099 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6100 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6101 // Create a unique name for the entry function using the source location
6102 // information of the current target region. The name will be something like:
6104 // __omp_offloading_DD_FFFF_PP_lBB[_CC]
6106 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6107 // mangled name of the function that encloses the target region and BB is the
6108 // line number of the target region. CC is a count added when more than one
6109 // region is located at the same location.
6111 const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
6112 !CGM.getLangOpts().OpenMPOffloadMandatory;
6113 auto EntryInfo =
6114 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), ParentName);
6116 SmallString<64> EntryFnName;
6117 OffloadEntriesInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
6119 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6121 CodeGenFunction CGF(CGM, true);
6122 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6123 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6125 if (BuildOutlinedFn)
6126 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6128 // If this target outline function is not an offload entry, we don't need to
6129 // register it.
6130 if (!IsOffloadEntry)
6131 return;
6133 // The target region ID is used by the runtime library to identify the current
6134 // target region, so it only has to be unique and not necessarily point to
6135 // anything. It could be the pointer to the outlined function that implements
6136 // the target region, but we aren't using that so that the compiler doesn't
6137 // need to keep that, and could therefore inline the host function if proven
6138 // worthwhile during optimization. In the other hand, if emitting code for the
6139 // device, the ID has to be the function address so that it can retrieved from
6140 // the offloading entry and launched by the runtime library. We also mark the
6141 // outlined function to have external linkage in case we are emitting code for
6142 // the device, because these functions will be entry points to the device.
6144 if (CGM.getLangOpts().OpenMPIsDevice) {
6145 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6146 OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
6147 OutlinedFn->setDSOLocal(false);
6148 OutlinedFn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
6149 if (CGM.getTriple().isAMDGCN())
6150 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6151 } else {
6152 std::string Name = getName({EntryFnName, "region_id"});
6153 OutlinedFnID = new llvm::GlobalVariable(
6154 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6155 llvm::GlobalValue::WeakAnyLinkage,
6156 llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6159 // If we do not allow host fallback we still need a named address to use.
6160 llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
6161 if (!BuildOutlinedFn) {
6162 assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
6163 "Named kernel already exists?");
6164 TargetRegionEntryAddr = new llvm::GlobalVariable(
6165 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6166 llvm::GlobalValue::InternalLinkage,
6167 llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
6170 // Register the information for the entry associated with this target region.
6171 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6172 EntryInfo, TargetRegionEntryAddr, OutlinedFnID,
6173 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion,
6174 CGM.getLangOpts().OpenMPIsDevice);
6176 // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6177 int32_t DefaultValTeams = -1;
6178 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6179 if (DefaultValTeams > 0 && OutlinedFn) {
6180 OutlinedFn->addFnAttr("omp_target_num_teams",
6181 std::to_string(DefaultValTeams));
6183 int32_t DefaultValThreads = -1;
6184 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6185 if (DefaultValThreads > 0 && OutlinedFn) {
6186 OutlinedFn->addFnAttr("omp_target_thread_limit",
6187 std::to_string(DefaultValThreads));
6190 if (BuildOutlinedFn)
6191 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6194 /// Checks if the expression is constant or does not have non-trivial function
6195 /// calls.
6196 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6197 // We can skip constant expressions.
6198 // We can skip expressions with trivial calls or simple expressions.
6199 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6200 !E->hasNonTrivialCall(Ctx)) &&
6201 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6204 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6205 const Stmt *Body) {
6206 const Stmt *Child = Body->IgnoreContainers();
6207 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6208 Child = nullptr;
6209 for (const Stmt *S : C->body()) {
6210 if (const auto *E = dyn_cast<Expr>(S)) {
6211 if (isTrivial(Ctx, E))
6212 continue;
6214 // Some of the statements can be ignored.
6215 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6216 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6217 continue;
6218 // Analyze declarations.
6219 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6220 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6221 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6222 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6223 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6224 isa<UsingDirectiveDecl>(D) ||
6225 isa<OMPDeclareReductionDecl>(D) ||
6226 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6227 return true;
6228 const auto *VD = dyn_cast<VarDecl>(D);
6229 if (!VD)
6230 return false;
6231 return VD->hasGlobalStorage() || !VD->isUsed();
6233 continue;
6235 // Found multiple children - cannot get the one child only.
6236 if (Child)
6237 return nullptr;
6238 Child = S;
6240 if (Child)
6241 Child = Child->IgnoreContainers();
6243 return Child;
6246 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6247 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6248 int32_t &DefaultVal) {
6250 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6251 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6252 "Expected target-based executable directive.");
6253 switch (DirectiveKind) {
6254 case OMPD_target: {
6255 const auto *CS = D.getInnermostCapturedStmt();
6256 const auto *Body =
6257 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6258 const Stmt *ChildStmt =
6259 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6260 if (const auto *NestedDir =
6261 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6262 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6263 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6264 const Expr *NumTeams =
6265 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6266 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6267 if (auto Constant =
6268 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6269 DefaultVal = Constant->getExtValue();
6270 return NumTeams;
6272 DefaultVal = 0;
6273 return nullptr;
6275 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6276 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6277 DefaultVal = 1;
6278 return nullptr;
6280 DefaultVal = 1;
6281 return nullptr;
6283 // A value of -1 is used to check if we need to emit no teams region
6284 DefaultVal = -1;
6285 return nullptr;
6287 case OMPD_target_teams:
6288 case OMPD_target_teams_distribute:
6289 case OMPD_target_teams_distribute_simd:
6290 case OMPD_target_teams_distribute_parallel_for:
6291 case OMPD_target_teams_distribute_parallel_for_simd: {
6292 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6293 const Expr *NumTeams =
6294 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6295 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6296 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6297 DefaultVal = Constant->getExtValue();
6298 return NumTeams;
6300 DefaultVal = 0;
6301 return nullptr;
6303 case OMPD_target_parallel:
6304 case OMPD_target_parallel_for:
6305 case OMPD_target_parallel_for_simd:
6306 case OMPD_target_simd:
6307 DefaultVal = 1;
6308 return nullptr;
6309 case OMPD_parallel:
6310 case OMPD_for:
6311 case OMPD_parallel_for:
6312 case OMPD_parallel_master:
6313 case OMPD_parallel_sections:
6314 case OMPD_for_simd:
6315 case OMPD_parallel_for_simd:
6316 case OMPD_cancel:
6317 case OMPD_cancellation_point:
6318 case OMPD_ordered:
6319 case OMPD_threadprivate:
6320 case OMPD_allocate:
6321 case OMPD_task:
6322 case OMPD_simd:
6323 case OMPD_tile:
6324 case OMPD_unroll:
6325 case OMPD_sections:
6326 case OMPD_section:
6327 case OMPD_single:
6328 case OMPD_master:
6329 case OMPD_critical:
6330 case OMPD_taskyield:
6331 case OMPD_barrier:
6332 case OMPD_taskwait:
6333 case OMPD_taskgroup:
6334 case OMPD_atomic:
6335 case OMPD_flush:
6336 case OMPD_depobj:
6337 case OMPD_scan:
6338 case OMPD_teams:
6339 case OMPD_target_data:
6340 case OMPD_target_exit_data:
6341 case OMPD_target_enter_data:
6342 case OMPD_distribute:
6343 case OMPD_distribute_simd:
6344 case OMPD_distribute_parallel_for:
6345 case OMPD_distribute_parallel_for_simd:
6346 case OMPD_teams_distribute:
6347 case OMPD_teams_distribute_simd:
6348 case OMPD_teams_distribute_parallel_for:
6349 case OMPD_teams_distribute_parallel_for_simd:
6350 case OMPD_target_update:
6351 case OMPD_declare_simd:
6352 case OMPD_declare_variant:
6353 case OMPD_begin_declare_variant:
6354 case OMPD_end_declare_variant:
6355 case OMPD_declare_target:
6356 case OMPD_end_declare_target:
6357 case OMPD_declare_reduction:
6358 case OMPD_declare_mapper:
6359 case OMPD_taskloop:
6360 case OMPD_taskloop_simd:
6361 case OMPD_master_taskloop:
6362 case OMPD_master_taskloop_simd:
6363 case OMPD_parallel_master_taskloop:
6364 case OMPD_parallel_master_taskloop_simd:
6365 case OMPD_requires:
6366 case OMPD_metadirective:
6367 case OMPD_unknown:
6368 break;
6369 default:
6370 break;
6372 llvm_unreachable("Unexpected directive kind.");
6375 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6376 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6377 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6378 "Clauses associated with the teams directive expected to be emitted "
6379 "only for the host!");
6380 CGBuilderTy &Bld = CGF.Builder;
6381 int32_t DefaultNT = -1;
6382 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6383 if (NumTeams != nullptr) {
6384 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6386 switch (DirectiveKind) {
6387 case OMPD_target: {
6388 const auto *CS = D.getInnermostCapturedStmt();
6389 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6390 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6391 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6392 /*IgnoreResultAssign*/ true);
6393 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6394 /*isSigned=*/true);
6396 case OMPD_target_teams:
6397 case OMPD_target_teams_distribute:
6398 case OMPD_target_teams_distribute_simd:
6399 case OMPD_target_teams_distribute_parallel_for:
6400 case OMPD_target_teams_distribute_parallel_for_simd: {
6401 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6402 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6403 /*IgnoreResultAssign*/ true);
6404 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6405 /*isSigned=*/true);
6407 default:
6408 break;
6412 return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
6415 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6416 llvm::Value *DefaultThreadLimitVal) {
6417 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6418 CGF.getContext(), CS->getCapturedStmt());
6419 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6420 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6421 llvm::Value *NumThreads = nullptr;
6422 llvm::Value *CondVal = nullptr;
6423 // Handle if clause. If if clause present, the number of threads is
6424 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6425 if (Dir->hasClausesOfKind<OMPIfClause>()) {
6426 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6427 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6428 const OMPIfClause *IfClause = nullptr;
6429 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6430 if (C->getNameModifier() == OMPD_unknown ||
6431 C->getNameModifier() == OMPD_parallel) {
6432 IfClause = C;
6433 break;
6436 if (IfClause) {
6437 const Expr *Cond = IfClause->getCondition();
6438 bool Result;
6439 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6440 if (!Result)
6441 return CGF.Builder.getInt32(1);
6442 } else {
6443 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6444 if (const auto *PreInit =
6445 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6446 for (const auto *I : PreInit->decls()) {
6447 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6448 CGF.EmitVarDecl(cast<VarDecl>(*I));
6449 } else {
6450 CodeGenFunction::AutoVarEmission Emission =
6451 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6452 CGF.EmitAutoVarCleanups(Emission);
6456 CondVal = CGF.EvaluateExprAsBool(Cond);
6460 // Check the value of num_threads clause iff if clause was not specified
6461 // or is not evaluated to false.
6462 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6463 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6464 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6465 const auto *NumThreadsClause =
6466 Dir->getSingleClause<OMPNumThreadsClause>();
6467 CodeGenFunction::LexicalScope Scope(
6468 CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6469 if (const auto *PreInit =
6470 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6471 for (const auto *I : PreInit->decls()) {
6472 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6473 CGF.EmitVarDecl(cast<VarDecl>(*I));
6474 } else {
6475 CodeGenFunction::AutoVarEmission Emission =
6476 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6477 CGF.EmitAutoVarCleanups(Emission);
6481 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6482 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6483 /*isSigned=*/false);
6484 if (DefaultThreadLimitVal)
6485 NumThreads = CGF.Builder.CreateSelect(
6486 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6487 DefaultThreadLimitVal, NumThreads);
6488 } else {
6489 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6490 : CGF.Builder.getInt32(0);
6492 // Process condition of the if clause.
6493 if (CondVal) {
6494 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6495 CGF.Builder.getInt32(1));
6497 return NumThreads;
6499 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6500 return CGF.Builder.getInt32(1);
6501 return DefaultThreadLimitVal;
6503 return DefaultThreadLimitVal ? DefaultThreadLimitVal
6504 : CGF.Builder.getInt32(0);
6507 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6508 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6509 int32_t &DefaultVal) {
6510 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6511 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6512 "Expected target-based executable directive.");
6514 switch (DirectiveKind) {
6515 case OMPD_target:
6516 // Teams have no clause thread_limit
6517 return nullptr;
6518 case OMPD_target_teams:
6519 case OMPD_target_teams_distribute:
6520 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6521 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6522 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6523 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6524 if (auto Constant =
6525 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6526 DefaultVal = Constant->getExtValue();
6527 return ThreadLimit;
6529 return nullptr;
6530 case OMPD_target_parallel:
6531 case OMPD_target_parallel_for:
6532 case OMPD_target_parallel_for_simd:
6533 case OMPD_target_teams_distribute_parallel_for:
6534 case OMPD_target_teams_distribute_parallel_for_simd: {
6535 Expr *ThreadLimit = nullptr;
6536 Expr *NumThreads = nullptr;
6537 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6538 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6539 ThreadLimit = ThreadLimitClause->getThreadLimit();
6540 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6541 if (auto Constant =
6542 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6543 DefaultVal = Constant->getExtValue();
6545 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6546 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6547 NumThreads = NumThreadsClause->getNumThreads();
6548 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6549 if (auto Constant =
6550 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6551 if (Constant->getExtValue() < DefaultVal) {
6552 DefaultVal = Constant->getExtValue();
6553 ThreadLimit = NumThreads;
6558 return ThreadLimit;
6560 case OMPD_target_teams_distribute_simd:
6561 case OMPD_target_simd:
6562 DefaultVal = 1;
6563 return nullptr;
6564 case OMPD_parallel:
6565 case OMPD_for:
6566 case OMPD_parallel_for:
6567 case OMPD_parallel_master:
6568 case OMPD_parallel_sections:
6569 case OMPD_for_simd:
6570 case OMPD_parallel_for_simd:
6571 case OMPD_cancel:
6572 case OMPD_cancellation_point:
6573 case OMPD_ordered:
6574 case OMPD_threadprivate:
6575 case OMPD_allocate:
6576 case OMPD_task:
6577 case OMPD_simd:
6578 case OMPD_tile:
6579 case OMPD_unroll:
6580 case OMPD_sections:
6581 case OMPD_section:
6582 case OMPD_single:
6583 case OMPD_master:
6584 case OMPD_critical:
6585 case OMPD_taskyield:
6586 case OMPD_barrier:
6587 case OMPD_taskwait:
6588 case OMPD_taskgroup:
6589 case OMPD_atomic:
6590 case OMPD_flush:
6591 case OMPD_depobj:
6592 case OMPD_scan:
6593 case OMPD_teams:
6594 case OMPD_target_data:
6595 case OMPD_target_exit_data:
6596 case OMPD_target_enter_data:
6597 case OMPD_distribute:
6598 case OMPD_distribute_simd:
6599 case OMPD_distribute_parallel_for:
6600 case OMPD_distribute_parallel_for_simd:
6601 case OMPD_teams_distribute:
6602 case OMPD_teams_distribute_simd:
6603 case OMPD_teams_distribute_parallel_for:
6604 case OMPD_teams_distribute_parallel_for_simd:
6605 case OMPD_target_update:
6606 case OMPD_declare_simd:
6607 case OMPD_declare_variant:
6608 case OMPD_begin_declare_variant:
6609 case OMPD_end_declare_variant:
6610 case OMPD_declare_target:
6611 case OMPD_end_declare_target:
6612 case OMPD_declare_reduction:
6613 case OMPD_declare_mapper:
6614 case OMPD_taskloop:
6615 case OMPD_taskloop_simd:
6616 case OMPD_master_taskloop:
6617 case OMPD_master_taskloop_simd:
6618 case OMPD_parallel_master_taskloop:
6619 case OMPD_parallel_master_taskloop_simd:
6620 case OMPD_requires:
6621 case OMPD_unknown:
6622 break;
6623 default:
6624 break;
6626 llvm_unreachable("Unsupported directive kind.");
6629 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6630 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6631 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6632 "Clauses associated with the teams directive expected to be emitted "
6633 "only for the host!");
6634 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6635 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6636 "Expected target-based executable directive.");
6637 CGBuilderTy &Bld = CGF.Builder;
6638 llvm::Value *ThreadLimitVal = nullptr;
6639 llvm::Value *NumThreadsVal = nullptr;
6640 switch (DirectiveKind) {
6641 case OMPD_target: {
6642 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6643 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6644 return NumThreads;
6645 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6646 CGF.getContext(), CS->getCapturedStmt());
6647 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6648 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6649 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6650 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6651 const auto *ThreadLimitClause =
6652 Dir->getSingleClause<OMPThreadLimitClause>();
6653 CodeGenFunction::LexicalScope Scope(
6654 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6655 if (const auto *PreInit =
6656 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6657 for (const auto *I : PreInit->decls()) {
6658 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6659 CGF.EmitVarDecl(cast<VarDecl>(*I));
6660 } else {
6661 CodeGenFunction::AutoVarEmission Emission =
6662 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6663 CGF.EmitAutoVarCleanups(Emission);
6667 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6668 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6669 ThreadLimitVal =
6670 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6672 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6673 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6674 CS = Dir->getInnermostCapturedStmt();
6675 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6676 CGF.getContext(), CS->getCapturedStmt());
6677 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6679 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6680 !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6681 CS = Dir->getInnermostCapturedStmt();
6682 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6683 return NumThreads;
6685 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6686 return Bld.getInt32(1);
6688 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6690 case OMPD_target_teams: {
6691 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6692 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6693 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6694 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6695 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6696 ThreadLimitVal =
6697 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6699 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6700 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6701 return NumThreads;
6702 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6703 CGF.getContext(), CS->getCapturedStmt());
6704 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6705 if (Dir->getDirectiveKind() == OMPD_distribute) {
6706 CS = Dir->getInnermostCapturedStmt();
6707 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6708 return NumThreads;
6711 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6713 case OMPD_target_teams_distribute:
6714 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6715 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6716 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6717 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6718 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6719 ThreadLimitVal =
6720 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6722 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6723 case OMPD_target_parallel:
6724 case OMPD_target_parallel_for:
6725 case OMPD_target_parallel_for_simd:
6726 case OMPD_target_teams_distribute_parallel_for:
6727 case OMPD_target_teams_distribute_parallel_for_simd: {
6728 llvm::Value *CondVal = nullptr;
6729 // Handle if clause. If if clause present, the number of threads is
6730 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6731 if (D.hasClausesOfKind<OMPIfClause>()) {
6732 const OMPIfClause *IfClause = nullptr;
6733 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6734 if (C->getNameModifier() == OMPD_unknown ||
6735 C->getNameModifier() == OMPD_parallel) {
6736 IfClause = C;
6737 break;
6740 if (IfClause) {
6741 const Expr *Cond = IfClause->getCondition();
6742 bool Result;
6743 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6744 if (!Result)
6745 return Bld.getInt32(1);
6746 } else {
6747 CodeGenFunction::RunCleanupsScope Scope(CGF);
6748 CondVal = CGF.EvaluateExprAsBool(Cond);
6752 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6753 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6754 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6755 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6756 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6757 ThreadLimitVal =
6758 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6760 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6761 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6762 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6763 llvm::Value *NumThreads = CGF.EmitScalarExpr(
6764 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6765 NumThreadsVal =
6766 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6767 ThreadLimitVal = ThreadLimitVal
6768 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6769 ThreadLimitVal),
6770 NumThreadsVal, ThreadLimitVal)
6771 : NumThreadsVal;
6773 if (!ThreadLimitVal)
6774 ThreadLimitVal = Bld.getInt32(0);
6775 if (CondVal)
6776 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6777 return ThreadLimitVal;
6779 case OMPD_target_teams_distribute_simd:
6780 case OMPD_target_simd:
6781 return Bld.getInt32(1);
6782 case OMPD_parallel:
6783 case OMPD_for:
6784 case OMPD_parallel_for:
6785 case OMPD_parallel_master:
6786 case OMPD_parallel_sections:
6787 case OMPD_for_simd:
6788 case OMPD_parallel_for_simd:
6789 case OMPD_cancel:
6790 case OMPD_cancellation_point:
6791 case OMPD_ordered:
6792 case OMPD_threadprivate:
6793 case OMPD_allocate:
6794 case OMPD_task:
6795 case OMPD_simd:
6796 case OMPD_tile:
6797 case OMPD_unroll:
6798 case OMPD_sections:
6799 case OMPD_section:
6800 case OMPD_single:
6801 case OMPD_master:
6802 case OMPD_critical:
6803 case OMPD_taskyield:
6804 case OMPD_barrier:
6805 case OMPD_taskwait:
6806 case OMPD_taskgroup:
6807 case OMPD_atomic:
6808 case OMPD_flush:
6809 case OMPD_depobj:
6810 case OMPD_scan:
6811 case OMPD_teams:
6812 case OMPD_target_data:
6813 case OMPD_target_exit_data:
6814 case OMPD_target_enter_data:
6815 case OMPD_distribute:
6816 case OMPD_distribute_simd:
6817 case OMPD_distribute_parallel_for:
6818 case OMPD_distribute_parallel_for_simd:
6819 case OMPD_teams_distribute:
6820 case OMPD_teams_distribute_simd:
6821 case OMPD_teams_distribute_parallel_for:
6822 case OMPD_teams_distribute_parallel_for_simd:
6823 case OMPD_target_update:
6824 case OMPD_declare_simd:
6825 case OMPD_declare_variant:
6826 case OMPD_begin_declare_variant:
6827 case OMPD_end_declare_variant:
6828 case OMPD_declare_target:
6829 case OMPD_end_declare_target:
6830 case OMPD_declare_reduction:
6831 case OMPD_declare_mapper:
6832 case OMPD_taskloop:
6833 case OMPD_taskloop_simd:
6834 case OMPD_master_taskloop:
6835 case OMPD_master_taskloop_simd:
6836 case OMPD_parallel_master_taskloop:
6837 case OMPD_parallel_master_taskloop_simd:
6838 case OMPD_requires:
6839 case OMPD_metadirective:
6840 case OMPD_unknown:
6841 break;
6842 default:
6843 break;
6845 llvm_unreachable("Unsupported directive kind.");
6848 namespace {
6849 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6851 // Utility to handle information from clauses associated with a given
6852 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6853 // It provides a convenient interface to obtain the information and generate
6854 // code for that information.
6855 class MappableExprsHandler {
6856 public:
6857 /// Values for bit flags used to specify the mapping type for
6858 /// offloading.
6859 enum OpenMPOffloadMappingFlags : uint64_t {
6860 /// No flags
6861 OMP_MAP_NONE = 0x0,
6862 /// Allocate memory on the device and move data from host to device.
6863 OMP_MAP_TO = 0x01,
6864 /// Allocate memory on the device and move data from device to host.
6865 OMP_MAP_FROM = 0x02,
6866 /// Always perform the requested mapping action on the element, even
6867 /// if it was already mapped before.
6868 OMP_MAP_ALWAYS = 0x04,
6869 /// Delete the element from the device environment, ignoring the
6870 /// current reference count associated with the element.
6871 OMP_MAP_DELETE = 0x08,
6872 /// The element being mapped is a pointer-pointee pair; both the
6873 /// pointer and the pointee should be mapped.
6874 OMP_MAP_PTR_AND_OBJ = 0x10,
6875 /// This flags signals that the base address of an entry should be
6876 /// passed to the target kernel as an argument.
6877 OMP_MAP_TARGET_PARAM = 0x20,
6878 /// Signal that the runtime library has to return the device pointer
6879 /// in the current position for the data being mapped. Used when we have the
6880 /// use_device_ptr or use_device_addr clause.
6881 OMP_MAP_RETURN_PARAM = 0x40,
6882 /// This flag signals that the reference being passed is a pointer to
6883 /// private data.
6884 OMP_MAP_PRIVATE = 0x80,
6885 /// Pass the element to the device by value.
6886 OMP_MAP_LITERAL = 0x100,
6887 /// Implicit map
6888 OMP_MAP_IMPLICIT = 0x200,
6889 /// Close is a hint to the runtime to allocate memory close to
6890 /// the target device.
6891 OMP_MAP_CLOSE = 0x400,
6892 /// 0x800 is reserved for compatibility with XLC.
6893 /// Produce a runtime error if the data is not already allocated.
6894 OMP_MAP_PRESENT = 0x1000,
6895 // Increment and decrement a separate reference counter so that the data
6896 // cannot be unmapped within the associated region. Thus, this flag is
6897 // intended to be used on 'target' and 'target data' directives because they
6898 // are inherently structured. It is not intended to be used on 'target
6899 // enter data' and 'target exit data' directives because they are inherently
6900 // dynamic.
6901 // This is an OpenMP extension for the sake of OpenACC support.
6902 OMP_MAP_OMPX_HOLD = 0x2000,
6903 /// Signal that the runtime library should use args as an array of
6904 /// descriptor_dim pointers and use args_size as dims. Used when we have
6905 /// non-contiguous list items in target update directive
6906 OMP_MAP_NON_CONTIG = 0x100000000000,
6907 /// The 16 MSBs of the flags indicate whether the entry is member of some
6908 /// struct/class.
6909 OMP_MAP_MEMBER_OF = 0xffff000000000000,
6910 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
6913 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6914 static unsigned getFlagMemberOffset() {
6915 unsigned Offset = 0;
6916 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
6917 Remain = Remain >> 1)
6918 Offset++;
6919 return Offset;
6922 /// Class that holds debugging information for a data mapping to be passed to
6923 /// the runtime library.
6924 class MappingExprInfo {
6925 /// The variable declaration used for the data mapping.
6926 const ValueDecl *MapDecl = nullptr;
6927 /// The original expression used in the map clause, or null if there is
6928 /// none.
6929 const Expr *MapExpr = nullptr;
6931 public:
6932 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6933 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6935 const ValueDecl *getMapDecl() const { return MapDecl; }
6936 const Expr *getMapExpr() const { return MapExpr; }
6939 /// Class that associates information with a base pointer to be passed to the
6940 /// runtime library.
6941 class BasePointerInfo {
6942 /// The base pointer.
6943 llvm::Value *Ptr = nullptr;
6944 /// The base declaration that refers to this device pointer, or null if
6945 /// there is none.
6946 const ValueDecl *DevPtrDecl = nullptr;
6948 public:
6949 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
6950 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
6951 llvm::Value *operator*() const { return Ptr; }
6952 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
6953 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
6956 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6957 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
6958 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
6959 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
6960 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
6961 using MapDimArrayTy = SmallVector<uint64_t, 4>;
6962 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
6964 /// This structure contains combined information generated for mappable
6965 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6966 /// mappers, and non-contiguous information.
6967 struct MapCombinedInfoTy {
6968 struct StructNonContiguousInfo {
6969 bool IsNonContiguous = false;
6970 MapDimArrayTy Dims;
6971 MapNonContiguousArrayTy Offsets;
6972 MapNonContiguousArrayTy Counts;
6973 MapNonContiguousArrayTy Strides;
6975 MapExprsArrayTy Exprs;
6976 MapBaseValuesArrayTy BasePointers;
6977 MapValuesArrayTy Pointers;
6978 MapValuesArrayTy Sizes;
6979 MapFlagsArrayTy Types;
6980 MapMappersArrayTy Mappers;
6981 StructNonContiguousInfo NonContigInfo;
6983 /// Append arrays in \a CurInfo.
6984 void append(MapCombinedInfoTy &CurInfo) {
6985 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6986 BasePointers.append(CurInfo.BasePointers.begin(),
6987 CurInfo.BasePointers.end());
6988 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
6989 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
6990 Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
6991 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6992 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
6993 CurInfo.NonContigInfo.Dims.end());
6994 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
6995 CurInfo.NonContigInfo.Offsets.end());
6996 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
6997 CurInfo.NonContigInfo.Counts.end());
6998 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
6999 CurInfo.NonContigInfo.Strides.end());
7003 /// Map between a struct and the its lowest & highest elements which have been
7004 /// mapped.
7005 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7006 /// HE(FieldIndex, Pointer)}
7007 struct StructRangeInfoTy {
7008 MapCombinedInfoTy PreliminaryMapData;
7009 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7010 0, Address::invalid()};
7011 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7012 0, Address::invalid()};
7013 Address Base = Address::invalid();
7014 Address LB = Address::invalid();
7015 bool IsArraySection = false;
7016 bool HasCompleteRecord = false;
7019 private:
7020 /// Kind that defines how a device pointer has to be returned.
7021 struct MapInfo {
7022 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7023 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7024 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7025 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7026 bool ReturnDevicePointer = false;
7027 bool IsImplicit = false;
7028 const ValueDecl *Mapper = nullptr;
7029 const Expr *VarRef = nullptr;
7030 bool ForDeviceAddr = false;
7032 MapInfo() = default;
7033 MapInfo(
7034 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7035 OpenMPMapClauseKind MapType,
7036 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7037 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7038 bool ReturnDevicePointer, bool IsImplicit,
7039 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7040 bool ForDeviceAddr = false)
7041 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7042 MotionModifiers(MotionModifiers),
7043 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7044 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7047 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7048 /// member and there is no map information about it, then emission of that
7049 /// entry is deferred until the whole struct has been processed.
7050 struct DeferredDevicePtrEntryTy {
7051 const Expr *IE = nullptr;
7052 const ValueDecl *VD = nullptr;
7053 bool ForDeviceAddr = false;
7055 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7056 bool ForDeviceAddr)
7057 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7060 /// The target directive from where the mappable clauses were extracted. It
7061 /// is either a executable directive or a user-defined mapper directive.
7062 llvm::PointerUnion<const OMPExecutableDirective *,
7063 const OMPDeclareMapperDecl *>
7064 CurDir;
7066 /// Function the directive is being generated for.
7067 CodeGenFunction &CGF;
7069 /// Set of all first private variables in the current directive.
7070 /// bool data is set to true if the variable is implicitly marked as
7071 /// firstprivate, false otherwise.
7072 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7074 /// Map between device pointer declarations and their expression components.
7075 /// The key value for declarations in 'this' is null.
7076 llvm::DenseMap<
7077 const ValueDecl *,
7078 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7079 DevPointersMap;
7081 /// Map between device addr declarations and their expression components.
7082 /// The key value for declarations in 'this' is null.
7083 llvm::DenseMap<
7084 const ValueDecl *,
7085 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7086 HasDevAddrsMap;
7088 /// Map between lambda declarations and their map type.
7089 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7091 llvm::Value *getExprTypeSize(const Expr *E) const {
7092 QualType ExprTy = E->getType().getCanonicalType();
7094 // Calculate the size for array shaping expression.
7095 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7096 llvm::Value *Size =
7097 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7098 for (const Expr *SE : OAE->getDimensions()) {
7099 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7100 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7101 CGF.getContext().getSizeType(),
7102 SE->getExprLoc());
7103 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7105 return Size;
7108 // Reference types are ignored for mapping purposes.
7109 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7110 ExprTy = RefTy->getPointeeType().getCanonicalType();
7112 // Given that an array section is considered a built-in type, we need to
7113 // do the calculation based on the length of the section instead of relying
7114 // on CGF.getTypeSize(E->getType()).
7115 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7116 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7117 OAE->getBase()->IgnoreParenImpCasts())
7118 .getCanonicalType();
7120 // If there is no length associated with the expression and lower bound is
7121 // not specified too, that means we are using the whole length of the
7122 // base.
7123 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7124 !OAE->getLowerBound())
7125 return CGF.getTypeSize(BaseTy);
7127 llvm::Value *ElemSize;
7128 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7129 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7130 } else {
7131 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7132 assert(ATy && "Expecting array type if not a pointer type.");
7133 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7136 // If we don't have a length at this point, that is because we have an
7137 // array section with a single element.
7138 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7139 return ElemSize;
7141 if (const Expr *LenExpr = OAE->getLength()) {
7142 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7143 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7144 CGF.getContext().getSizeType(),
7145 LenExpr->getExprLoc());
7146 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7148 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7149 OAE->getLowerBound() && "expected array_section[lb:].");
7150 // Size = sizetype - lb * elemtype;
7151 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7152 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7153 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7154 CGF.getContext().getSizeType(),
7155 OAE->getLowerBound()->getExprLoc());
7156 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7157 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7158 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7159 LengthVal = CGF.Builder.CreateSelect(
7160 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7161 return LengthVal;
7163 return CGF.getTypeSize(ExprTy);
7166 /// Return the corresponding bits for a given map clause modifier. Add
7167 /// a flag marking the map as a pointer if requested. Add a flag marking the
7168 /// map as the first one of a series of maps that relate to the same map
7169 /// expression.
7170 OpenMPOffloadMappingFlags getMapTypeBits(
7171 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7172 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7173 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7174 OpenMPOffloadMappingFlags Bits =
7175 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7176 switch (MapType) {
7177 case OMPC_MAP_alloc:
7178 case OMPC_MAP_release:
7179 // alloc and release is the default behavior in the runtime library, i.e.
7180 // if we don't pass any bits alloc/release that is what the runtime is
7181 // going to do. Therefore, we don't need to signal anything for these two
7182 // type modifiers.
7183 break;
7184 case OMPC_MAP_to:
7185 Bits |= OMP_MAP_TO;
7186 break;
7187 case OMPC_MAP_from:
7188 Bits |= OMP_MAP_FROM;
7189 break;
7190 case OMPC_MAP_tofrom:
7191 Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7192 break;
7193 case OMPC_MAP_delete:
7194 Bits |= OMP_MAP_DELETE;
7195 break;
7196 case OMPC_MAP_unknown:
7197 llvm_unreachable("Unexpected map type!");
7199 if (AddPtrFlag)
7200 Bits |= OMP_MAP_PTR_AND_OBJ;
7201 if (AddIsTargetParamFlag)
7202 Bits |= OMP_MAP_TARGET_PARAM;
7203 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7204 Bits |= OMP_MAP_ALWAYS;
7205 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7206 Bits |= OMP_MAP_CLOSE;
7207 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7208 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7209 Bits |= OMP_MAP_PRESENT;
7210 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7211 Bits |= OMP_MAP_OMPX_HOLD;
7212 if (IsNonContiguous)
7213 Bits |= OMP_MAP_NON_CONTIG;
7214 return Bits;
7217 /// Return true if the provided expression is a final array section. A
7218 /// final array section, is one whose length can't be proved to be one.
7219 bool isFinalArraySectionExpression(const Expr *E) const {
7220 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7222 // It is not an array section and therefore not a unity-size one.
7223 if (!OASE)
7224 return false;
7226 // An array section with no colon always refer to a single element.
7227 if (OASE->getColonLocFirst().isInvalid())
7228 return false;
7230 const Expr *Length = OASE->getLength();
7232 // If we don't have a length we have to check if the array has size 1
7233 // for this dimension. Also, we should always expect a length if the
7234 // base type is pointer.
7235 if (!Length) {
7236 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7237 OASE->getBase()->IgnoreParenImpCasts())
7238 .getCanonicalType();
7239 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7240 return ATy->getSize().getSExtValue() != 1;
7241 // If we don't have a constant dimension length, we have to consider
7242 // the current section as having any size, so it is not necessarily
7243 // unitary. If it happen to be unity size, that's user fault.
7244 return true;
7247 // Check if the length evaluates to 1.
7248 Expr::EvalResult Result;
7249 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7250 return true; // Can have more that size 1.
7252 llvm::APSInt ConstLength = Result.Val.getInt();
7253 return ConstLength.getSExtValue() != 1;
7256 /// Generate the base pointers, section pointers, sizes, map type bits, and
7257 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7258 /// map type, map or motion modifiers, and expression components.
7259 /// \a IsFirstComponent should be set to true if the provided set of
7260 /// components is the first associated with a capture.
7261 void generateInfoForComponentList(
7262 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7263 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7264 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7265 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7266 bool IsFirstComponentList, bool IsImplicit,
7267 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7268 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7269 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7270 OverlappedElements = llvm::None) const {
7271 // The following summarizes what has to be generated for each map and the
7272 // types below. The generated information is expressed in this order:
7273 // base pointer, section pointer, size, flags
7274 // (to add to the ones that come from the map type and modifier).
7276 // double d;
7277 // int i[100];
7278 // float *p;
7280 // struct S1 {
7281 // int i;
7282 // float f[50];
7283 // }
7284 // struct S2 {
7285 // int i;
7286 // float f[50];
7287 // S1 s;
7288 // double *p;
7289 // struct S2 *ps;
7290 // int &ref;
7291 // }
7292 // S2 s;
7293 // S2 *ps;
7295 // map(d)
7296 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7298 // map(i)
7299 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7301 // map(i[1:23])
7302 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7304 // map(p)
7305 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7307 // map(p[1:24])
7308 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7309 // in unified shared memory mode or for local pointers
7310 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7312 // map(s)
7313 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7315 // map(s.i)
7316 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7318 // map(s.s.f)
7319 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7321 // map(s.p)
7322 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7324 // map(to: s.p[:22])
7325 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7326 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7327 // &(s.p), &(s.p[0]), 22*sizeof(double),
7328 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7329 // (*) alloc space for struct members, only this is a target parameter
7330 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7331 // optimizes this entry out, same in the examples below)
7332 // (***) map the pointee (map: to)
7334 // map(to: s.ref)
7335 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7336 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7337 // (*) alloc space for struct members, only this is a target parameter
7338 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7339 // optimizes this entry out, same in the examples below)
7340 // (***) map the pointee (map: to)
7342 // map(s.ps)
7343 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7345 // map(from: s.ps->s.i)
7346 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7347 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7348 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7350 // map(to: s.ps->ps)
7351 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7352 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7353 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7355 // map(s.ps->ps->ps)
7356 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7357 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7358 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7359 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7361 // map(to: s.ps->ps->s.f[:22])
7362 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7363 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7364 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7365 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7367 // map(ps)
7368 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7370 // map(ps->i)
7371 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7373 // map(ps->s.f)
7374 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7376 // map(from: ps->p)
7377 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7379 // map(to: ps->p[:22])
7380 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7381 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7382 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7384 // map(ps->ps)
7385 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7387 // map(from: ps->ps->s.i)
7388 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7389 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7390 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7392 // map(from: ps->ps->ps)
7393 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7394 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7395 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7397 // map(ps->ps->ps->ps)
7398 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7399 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7400 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7401 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7403 // map(to: ps->ps->ps->s.f[:22])
7404 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7405 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7406 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7407 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7409 // map(to: s.f[:22]) map(from: s.p[:33])
7410 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7411 // sizeof(double*) (**), TARGET_PARAM
7412 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7413 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7414 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7415 // (*) allocate contiguous space needed to fit all mapped members even if
7416 // we allocate space for members not mapped (in this example,
7417 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7418 // them as well because they fall between &s.f[0] and &s.p)
7420 // map(from: s.f[:22]) map(to: ps->p[:33])
7421 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7422 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7423 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7424 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7425 // (*) the struct this entry pertains to is the 2nd element in the list of
7426 // arguments, hence MEMBER_OF(2)
7428 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7429 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7430 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7431 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7432 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7433 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7434 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7435 // (*) the struct this entry pertains to is the 4th element in the list
7436 // of arguments, hence MEMBER_OF(4)
7438 // Track if the map information being generated is the first for a capture.
7439 bool IsCaptureFirstInfo = IsFirstComponentList;
7440 // When the variable is on a declare target link or in a to clause with
7441 // unified memory, a reference is needed to hold the host/device address
7442 // of the variable.
7443 bool RequiresReference = false;
7445 // Scan the components from the base to the complete expression.
7446 auto CI = Components.rbegin();
7447 auto CE = Components.rend();
7448 auto I = CI;
7450 // Track if the map information being generated is the first for a list of
7451 // components.
7452 bool IsExpressionFirstInfo = true;
7453 bool FirstPointerInComplexData = false;
7454 Address BP = Address::invalid();
7455 const Expr *AssocExpr = I->getAssociatedExpression();
7456 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7457 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7458 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7460 if (isa<MemberExpr>(AssocExpr)) {
7461 // The base is the 'this' pointer. The content of the pointer is going
7462 // to be the base of the field being mapped.
7463 BP = CGF.LoadCXXThisAddress();
7464 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7465 (OASE &&
7466 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7467 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7468 } else if (OAShE &&
7469 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7470 BP = Address(
7471 CGF.EmitScalarExpr(OAShE->getBase()),
7472 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7473 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7474 } else {
7475 // The base is the reference to the variable.
7476 // BP = &Var.
7477 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7478 if (const auto *VD =
7479 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7480 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7481 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7482 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7483 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7484 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7485 RequiresReference = true;
7486 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7491 // If the variable is a pointer and is being dereferenced (i.e. is not
7492 // the last component), the base has to be the pointer itself, not its
7493 // reference. References are ignored for mapping purposes.
7494 QualType Ty =
7495 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7496 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7497 // No need to generate individual map information for the pointer, it
7498 // can be associated with the combined storage if shared memory mode is
7499 // active or the base declaration is not global variable.
7500 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7501 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7502 !VD || VD->hasLocalStorage())
7503 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7504 else
7505 FirstPointerInComplexData = true;
7506 ++I;
7510 // Track whether a component of the list should be marked as MEMBER_OF some
7511 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7512 // in a component list should be marked as MEMBER_OF, all subsequent entries
7513 // do not belong to the base struct. E.g.
7514 // struct S2 s;
7515 // s.ps->ps->ps->f[:]
7516 // (1) (2) (3) (4)
7517 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7518 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7519 // is the pointee of ps(2) which is not member of struct s, so it should not
7520 // be marked as such (it is still PTR_AND_OBJ).
7521 // The variable is initialized to false so that PTR_AND_OBJ entries which
7522 // are not struct members are not considered (e.g. array of pointers to
7523 // data).
7524 bool ShouldBeMemberOf = false;
7526 // Variable keeping track of whether or not we have encountered a component
7527 // in the component list which is a member expression. Useful when we have a
7528 // pointer or a final array section, in which case it is the previous
7529 // component in the list which tells us whether we have a member expression.
7530 // E.g. X.f[:]
7531 // While processing the final array section "[:]" it is "f" which tells us
7532 // whether we are dealing with a member of a declared struct.
7533 const MemberExpr *EncounteredME = nullptr;
7535 // Track for the total number of dimension. Start from one for the dummy
7536 // dimension.
7537 uint64_t DimSize = 1;
7539 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7540 bool IsPrevMemberReference = false;
7542 for (; I != CE; ++I) {
7543 // If the current component is member of a struct (parent struct) mark it.
7544 if (!EncounteredME) {
7545 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7546 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7547 // as MEMBER_OF the parent struct.
7548 if (EncounteredME) {
7549 ShouldBeMemberOf = true;
7550 // Do not emit as complex pointer if this is actually not array-like
7551 // expression.
7552 if (FirstPointerInComplexData) {
7553 QualType Ty = std::prev(I)
7554 ->getAssociatedDeclaration()
7555 ->getType()
7556 .getNonReferenceType();
7557 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7558 FirstPointerInComplexData = false;
7563 auto Next = std::next(I);
7565 // We need to generate the addresses and sizes if this is the last
7566 // component, if the component is a pointer or if it is an array section
7567 // whose length can't be proved to be one. If this is a pointer, it
7568 // becomes the base address for the following components.
7570 // A final array section, is one whose length can't be proved to be one.
7571 // If the map item is non-contiguous then we don't treat any array section
7572 // as final array section.
7573 bool IsFinalArraySection =
7574 !IsNonContiguous &&
7575 isFinalArraySectionExpression(I->getAssociatedExpression());
7577 // If we have a declaration for the mapping use that, otherwise use
7578 // the base declaration of the map clause.
7579 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7580 ? I->getAssociatedDeclaration()
7581 : BaseDecl;
7582 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7583 : MapExpr;
7585 // Get information on whether the element is a pointer. Have to do a
7586 // special treatment for array sections given that they are built-in
7587 // types.
7588 const auto *OASE =
7589 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7590 const auto *OAShE =
7591 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7592 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7593 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7594 bool IsPointer =
7595 OAShE ||
7596 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7597 .getCanonicalType()
7598 ->isAnyPointerType()) ||
7599 I->getAssociatedExpression()->getType()->isAnyPointerType();
7600 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7601 MapDecl &&
7602 MapDecl->getType()->isLValueReferenceType();
7603 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7605 if (OASE)
7606 ++DimSize;
7608 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7609 IsFinalArraySection) {
7610 // If this is not the last component, we expect the pointer to be
7611 // associated with an array expression or member expression.
7612 assert((Next == CE ||
7613 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7614 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7615 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7616 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7617 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7618 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7619 "Unexpected expression");
7621 Address LB = Address::invalid();
7622 Address LowestElem = Address::invalid();
7623 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7624 const MemberExpr *E) {
7625 const Expr *BaseExpr = E->getBase();
7626 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7627 // scalar.
7628 LValue BaseLV;
7629 if (E->isArrow()) {
7630 LValueBaseInfo BaseInfo;
7631 TBAAAccessInfo TBAAInfo;
7632 Address Addr =
7633 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7634 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7635 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7636 } else {
7637 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7639 return BaseLV;
7641 if (OAShE) {
7642 LowestElem = LB =
7643 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7644 CGF.ConvertTypeForMem(
7645 OAShE->getBase()->getType()->getPointeeType()),
7646 CGF.getContext().getTypeAlignInChars(
7647 OAShE->getBase()->getType()));
7648 } else if (IsMemberReference) {
7649 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7650 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7651 LowestElem = CGF.EmitLValueForFieldInitialization(
7652 BaseLVal, cast<FieldDecl>(MapDecl))
7653 .getAddress(CGF);
7654 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7655 .getAddress(CGF);
7656 } else {
7657 LowestElem = LB =
7658 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7659 .getAddress(CGF);
7662 // If this component is a pointer inside the base struct then we don't
7663 // need to create any entry for it - it will be combined with the object
7664 // it is pointing to into a single PTR_AND_OBJ entry.
7665 bool IsMemberPointerOrAddr =
7666 EncounteredME &&
7667 (((IsPointer || ForDeviceAddr) &&
7668 I->getAssociatedExpression() == EncounteredME) ||
7669 (IsPrevMemberReference && !IsPointer) ||
7670 (IsMemberReference && Next != CE &&
7671 !Next->getAssociatedExpression()->getType()->isPointerType()));
7672 if (!OverlappedElements.empty() && Next == CE) {
7673 // Handle base element with the info for overlapped elements.
7674 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7675 assert(!IsPointer &&
7676 "Unexpected base element with the pointer type.");
7677 // Mark the whole struct as the struct that requires allocation on the
7678 // device.
7679 PartialStruct.LowestElem = {0, LowestElem};
7680 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7681 I->getAssociatedExpression()->getType());
7682 Address HB = CGF.Builder.CreateConstGEP(
7683 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7684 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7685 TypeSize.getQuantity() - 1);
7686 PartialStruct.HighestElem = {
7687 std::numeric_limits<decltype(
7688 PartialStruct.HighestElem.first)>::max(),
7689 HB};
7690 PartialStruct.Base = BP;
7691 PartialStruct.LB = LB;
7692 assert(
7693 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7694 "Overlapped elements must be used only once for the variable.");
7695 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7696 // Emit data for non-overlapped data.
7697 OpenMPOffloadMappingFlags Flags =
7698 OMP_MAP_MEMBER_OF |
7699 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7700 /*AddPtrFlag=*/false,
7701 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7702 llvm::Value *Size = nullptr;
7703 // Do bitcopy of all non-overlapped structure elements.
7704 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7705 Component : OverlappedElements) {
7706 Address ComponentLB = Address::invalid();
7707 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7708 Component) {
7709 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7710 const auto *FD = dyn_cast<FieldDecl>(VD);
7711 if (FD && FD->getType()->isLValueReferenceType()) {
7712 const auto *ME =
7713 cast<MemberExpr>(MC.getAssociatedExpression());
7714 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7715 ComponentLB =
7716 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7717 .getAddress(CGF);
7718 } else {
7719 ComponentLB =
7720 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7721 .getAddress(CGF);
7723 Size = CGF.Builder.CreatePtrDiff(
7724 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7725 CGF.EmitCastToVoidPtr(LB.getPointer()));
7726 break;
7729 assert(Size && "Failed to determine structure size");
7730 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7731 CombinedInfo.BasePointers.push_back(BP.getPointer());
7732 CombinedInfo.Pointers.push_back(LB.getPointer());
7733 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7734 Size, CGF.Int64Ty, /*isSigned=*/true));
7735 CombinedInfo.Types.push_back(Flags);
7736 CombinedInfo.Mappers.push_back(nullptr);
7737 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7738 : 1);
7739 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7741 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7742 CombinedInfo.BasePointers.push_back(BP.getPointer());
7743 CombinedInfo.Pointers.push_back(LB.getPointer());
7744 Size = CGF.Builder.CreatePtrDiff(
7745 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7746 CGF.EmitCastToVoidPtr(LB.getPointer()));
7747 CombinedInfo.Sizes.push_back(
7748 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7749 CombinedInfo.Types.push_back(Flags);
7750 CombinedInfo.Mappers.push_back(nullptr);
7751 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7752 : 1);
7753 break;
7755 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7756 if (!IsMemberPointerOrAddr ||
7757 (Next == CE && MapType != OMPC_MAP_unknown)) {
7758 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7759 CombinedInfo.BasePointers.push_back(BP.getPointer());
7760 CombinedInfo.Pointers.push_back(LB.getPointer());
7761 CombinedInfo.Sizes.push_back(
7762 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7763 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7764 : 1);
7766 // If Mapper is valid, the last component inherits the mapper.
7767 bool HasMapper = Mapper && Next == CE;
7768 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7770 // We need to add a pointer flag for each map that comes from the
7771 // same expression except for the first one. We also need to signal
7772 // this map is the first one that relates with the current capture
7773 // (there is a set of entries for each capture).
7774 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7775 MapType, MapModifiers, MotionModifiers, IsImplicit,
7776 !IsExpressionFirstInfo || RequiresReference ||
7777 FirstPointerInComplexData || IsMemberReference,
7778 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7780 if (!IsExpressionFirstInfo || IsMemberReference) {
7781 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7782 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7783 if (IsPointer || (IsMemberReference && Next != CE))
7784 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7785 OMP_MAP_DELETE | OMP_MAP_CLOSE);
7787 if (ShouldBeMemberOf) {
7788 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7789 // should be later updated with the correct value of MEMBER_OF.
7790 Flags |= OMP_MAP_MEMBER_OF;
7791 // From now on, all subsequent PTR_AND_OBJ entries should not be
7792 // marked as MEMBER_OF.
7793 ShouldBeMemberOf = false;
7797 CombinedInfo.Types.push_back(Flags);
7800 // If we have encountered a member expression so far, keep track of the
7801 // mapped member. If the parent is "*this", then the value declaration
7802 // is nullptr.
7803 if (EncounteredME) {
7804 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7805 unsigned FieldIndex = FD->getFieldIndex();
7807 // Update info about the lowest and highest elements for this struct
7808 if (!PartialStruct.Base.isValid()) {
7809 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7810 if (IsFinalArraySection) {
7811 Address HB =
7812 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7813 .getAddress(CGF);
7814 PartialStruct.HighestElem = {FieldIndex, HB};
7815 } else {
7816 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7818 PartialStruct.Base = BP;
7819 PartialStruct.LB = BP;
7820 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7821 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7822 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7823 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7827 // Need to emit combined struct for array sections.
7828 if (IsFinalArraySection || IsNonContiguous)
7829 PartialStruct.IsArraySection = true;
7831 // If we have a final array section, we are done with this expression.
7832 if (IsFinalArraySection)
7833 break;
7835 // The pointer becomes the base for the next element.
7836 if (Next != CE)
7837 BP = IsMemberReference ? LowestElem : LB;
7839 IsExpressionFirstInfo = false;
7840 IsCaptureFirstInfo = false;
7841 FirstPointerInComplexData = false;
7842 IsPrevMemberReference = IsMemberReference;
7843 } else if (FirstPointerInComplexData) {
7844 QualType Ty = Components.rbegin()
7845 ->getAssociatedDeclaration()
7846 ->getType()
7847 .getNonReferenceType();
7848 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7849 FirstPointerInComplexData = false;
7852 // If ran into the whole component - allocate the space for the whole
7853 // record.
7854 if (!EncounteredME)
7855 PartialStruct.HasCompleteRecord = true;
7857 if (!IsNonContiguous)
7858 return;
7860 const ASTContext &Context = CGF.getContext();
7862 // For supporting stride in array section, we need to initialize the first
7863 // dimension size as 1, first offset as 0, and first count as 1
7864 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7865 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7866 MapValuesArrayTy CurStrides;
7867 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7868 uint64_t ElementTypeSize;
7870 // Collect Size information for each dimension and get the element size as
7871 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7872 // should be [10, 10] and the first stride is 4 btyes.
7873 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7874 Components) {
7875 const Expr *AssocExpr = Component.getAssociatedExpression();
7876 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7878 if (!OASE)
7879 continue;
7881 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7882 auto *CAT = Context.getAsConstantArrayType(Ty);
7883 auto *VAT = Context.getAsVariableArrayType(Ty);
7885 // We need all the dimension size except for the last dimension.
7886 assert((VAT || CAT || &Component == &*Components.begin()) &&
7887 "Should be either ConstantArray or VariableArray if not the "
7888 "first Component");
7890 // Get element size if CurStrides is empty.
7891 if (CurStrides.empty()) {
7892 const Type *ElementType = nullptr;
7893 if (CAT)
7894 ElementType = CAT->getElementType().getTypePtr();
7895 else if (VAT)
7896 ElementType = VAT->getElementType().getTypePtr();
7897 else
7898 assert(&Component == &*Components.begin() &&
7899 "Only expect pointer (non CAT or VAT) when this is the "
7900 "first Component");
7901 // If ElementType is null, then it means the base is a pointer
7902 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7903 // for next iteration.
7904 if (ElementType) {
7905 // For the case that having pointer as base, we need to remove one
7906 // level of indirection.
7907 if (&Component != &*Components.begin())
7908 ElementType = ElementType->getPointeeOrArrayElementType();
7909 ElementTypeSize =
7910 Context.getTypeSizeInChars(ElementType).getQuantity();
7911 CurStrides.push_back(
7912 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7915 // Get dimension value except for the last dimension since we don't need
7916 // it.
7917 if (DimSizes.size() < Components.size() - 1) {
7918 if (CAT)
7919 DimSizes.push_back(llvm::ConstantInt::get(
7920 CGF.Int64Ty, CAT->getSize().getZExtValue()));
7921 else if (VAT)
7922 DimSizes.push_back(CGF.Builder.CreateIntCast(
7923 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7924 /*IsSigned=*/false));
7928 // Skip the dummy dimension since we have already have its information.
7929 auto *DI = DimSizes.begin() + 1;
7930 // Product of dimension.
7931 llvm::Value *DimProd =
7932 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7934 // Collect info for non-contiguous. Notice that offset, count, and stride
7935 // are only meaningful for array-section, so we insert a null for anything
7936 // other than array-section.
7937 // Also, the size of offset, count, and stride are not the same as
7938 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7939 // count, and stride are the same as the number of non-contiguous
7940 // declaration in target update to/from clause.
7941 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7942 Components) {
7943 const Expr *AssocExpr = Component.getAssociatedExpression();
7945 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7946 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7947 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7948 /*isSigned=*/false);
7949 CurOffsets.push_back(Offset);
7950 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7951 CurStrides.push_back(CurStrides.back());
7952 continue;
7955 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7957 if (!OASE)
7958 continue;
7960 // Offset
7961 const Expr *OffsetExpr = OASE->getLowerBound();
7962 llvm::Value *Offset = nullptr;
7963 if (!OffsetExpr) {
7964 // If offset is absent, then we just set it to zero.
7965 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7966 } else {
7967 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7968 CGF.Int64Ty,
7969 /*isSigned=*/false);
7971 CurOffsets.push_back(Offset);
7973 // Count
7974 const Expr *CountExpr = OASE->getLength();
7975 llvm::Value *Count = nullptr;
7976 if (!CountExpr) {
7977 // In Clang, once a high dimension is an array section, we construct all
7978 // the lower dimension as array section, however, for case like
7979 // arr[0:2][2], Clang construct the inner dimension as an array section
7980 // but it actually is not in an array section form according to spec.
7981 if (!OASE->getColonLocFirst().isValid() &&
7982 !OASE->getColonLocSecond().isValid()) {
7983 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7984 } else {
7985 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7986 // When the length is absent it defaults to ⌈(size −
7987 // lower-bound)/stride⌉, where size is the size of the array
7988 // dimension.
7989 const Expr *StrideExpr = OASE->getStride();
7990 llvm::Value *Stride =
7991 StrideExpr
7992 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7993 CGF.Int64Ty, /*isSigned=*/false)
7994 : nullptr;
7995 if (Stride)
7996 Count = CGF.Builder.CreateUDiv(
7997 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7998 else
7999 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8001 } else {
8002 Count = CGF.EmitScalarExpr(CountExpr);
8004 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8005 CurCounts.push_back(Count);
8007 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8008 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8009 // Offset Count Stride
8010 // D0 0 1 4 (int) <- dummy dimension
8011 // D1 0 2 8 (2 * (1) * 4)
8012 // D2 1 2 20 (1 * (1 * 5) * 4)
8013 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8014 const Expr *StrideExpr = OASE->getStride();
8015 llvm::Value *Stride =
8016 StrideExpr
8017 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8018 CGF.Int64Ty, /*isSigned=*/false)
8019 : nullptr;
8020 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8021 if (Stride)
8022 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8023 else
8024 CurStrides.push_back(DimProd);
8025 if (DI != DimSizes.end())
8026 ++DI;
8029 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8030 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8031 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8034 /// Return the adjusted map modifiers if the declaration a capture refers to
8035 /// appears in a first-private clause. This is expected to be used only with
8036 /// directives that start with 'target'.
8037 MappableExprsHandler::OpenMPOffloadMappingFlags
8038 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8039 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8041 // A first private variable captured by reference will use only the
8042 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8043 // declaration is known as first-private in this handler.
8044 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8045 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8046 return MappableExprsHandler::OMP_MAP_TO |
8047 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8048 return MappableExprsHandler::OMP_MAP_PRIVATE |
8049 MappableExprsHandler::OMP_MAP_TO;
8051 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8052 if (I != LambdasMap.end())
8053 // for map(to: lambda): using user specified map type.
8054 return getMapTypeBits(
8055 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8056 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8057 /*AddPtrFlag=*/false,
8058 /*AddIsTargetParamFlag=*/false,
8059 /*isNonContiguous=*/false);
8060 return MappableExprsHandler::OMP_MAP_TO |
8061 MappableExprsHandler::OMP_MAP_FROM;
8064 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8065 // Rotate by getFlagMemberOffset() bits.
8066 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8067 << getFlagMemberOffset());
8070 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8071 OpenMPOffloadMappingFlags MemberOfFlag) {
8072 // If the entry is PTR_AND_OBJ but has not been marked with the special
8073 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8074 // marked as MEMBER_OF.
8075 if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8076 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8077 return;
8079 // Reset the placeholder value to prepare the flag for the assignment of the
8080 // proper MEMBER_OF value.
8081 Flags &= ~OMP_MAP_MEMBER_OF;
8082 Flags |= MemberOfFlag;
8085 void getPlainLayout(const CXXRecordDecl *RD,
8086 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8087 bool AsBase) const {
8088 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8090 llvm::StructType *St =
8091 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8093 unsigned NumElements = St->getNumElements();
8094 llvm::SmallVector<
8095 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8096 RecordLayout(NumElements);
8098 // Fill bases.
8099 for (const auto &I : RD->bases()) {
8100 if (I.isVirtual())
8101 continue;
8102 const auto *Base = I.getType()->getAsCXXRecordDecl();
8103 // Ignore empty bases.
8104 if (Base->isEmpty() || CGF.getContext()
8105 .getASTRecordLayout(Base)
8106 .getNonVirtualSize()
8107 .isZero())
8108 continue;
8110 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8111 RecordLayout[FieldIndex] = Base;
8113 // Fill in virtual bases.
8114 for (const auto &I : RD->vbases()) {
8115 const auto *Base = I.getType()->getAsCXXRecordDecl();
8116 // Ignore empty bases.
8117 if (Base->isEmpty())
8118 continue;
8119 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8120 if (RecordLayout[FieldIndex])
8121 continue;
8122 RecordLayout[FieldIndex] = Base;
8124 // Fill in all the fields.
8125 assert(!RD->isUnion() && "Unexpected union.");
8126 for (const auto *Field : RD->fields()) {
8127 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8128 // will fill in later.)
8129 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8130 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8131 RecordLayout[FieldIndex] = Field;
8134 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8135 &Data : RecordLayout) {
8136 if (Data.isNull())
8137 continue;
8138 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8139 getPlainLayout(Base, Layout, /*AsBase=*/true);
8140 else
8141 Layout.push_back(Data.get<const FieldDecl *>());
8145 /// Generate all the base pointers, section pointers, sizes, map types, and
8146 /// mappers for the extracted mappable expressions (all included in \a
8147 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8148 /// pair of the relevant declaration and index where it occurs is appended to
8149 /// the device pointers info array.
8150 void generateAllInfoForClauses(
8151 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8152 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8153 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8154 // We have to process the component lists that relate with the same
8155 // declaration in a single chunk so that we can generate the map flags
8156 // correctly. Therefore, we organize all lists in a map.
8157 enum MapKind { Present, Allocs, Other, Total };
8158 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8159 SmallVector<SmallVector<MapInfo, 8>, 4>>
8160 Info;
8162 // Helper function to fill the information map for the different supported
8163 // clauses.
8164 auto &&InfoGen =
8165 [&Info, &SkipVarSet](
8166 const ValueDecl *D, MapKind Kind,
8167 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8168 OpenMPMapClauseKind MapType,
8169 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8170 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8171 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8172 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8173 if (SkipVarSet.contains(D))
8174 return;
8175 auto It = Info.find(D);
8176 if (It == Info.end())
8177 It = Info
8178 .insert(std::make_pair(
8179 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8180 .first;
8181 It->second[Kind].emplace_back(
8182 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8183 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8186 for (const auto *Cl : Clauses) {
8187 const auto *C = dyn_cast<OMPMapClause>(Cl);
8188 if (!C)
8189 continue;
8190 MapKind Kind = Other;
8191 if (llvm::is_contained(C->getMapTypeModifiers(),
8192 OMPC_MAP_MODIFIER_present))
8193 Kind = Present;
8194 else if (C->getMapType() == OMPC_MAP_alloc)
8195 Kind = Allocs;
8196 const auto *EI = C->getVarRefs().begin();
8197 for (const auto L : C->component_lists()) {
8198 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8199 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8200 C->getMapTypeModifiers(), llvm::None,
8201 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8203 ++EI;
8206 for (const auto *Cl : Clauses) {
8207 const auto *C = dyn_cast<OMPToClause>(Cl);
8208 if (!C)
8209 continue;
8210 MapKind Kind = Other;
8211 if (llvm::is_contained(C->getMotionModifiers(),
8212 OMPC_MOTION_MODIFIER_present))
8213 Kind = Present;
8214 const auto *EI = C->getVarRefs().begin();
8215 for (const auto L : C->component_lists()) {
8216 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8217 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8218 C->isImplicit(), std::get<2>(L), *EI);
8219 ++EI;
8222 for (const auto *Cl : Clauses) {
8223 const auto *C = dyn_cast<OMPFromClause>(Cl);
8224 if (!C)
8225 continue;
8226 MapKind Kind = Other;
8227 if (llvm::is_contained(C->getMotionModifiers(),
8228 OMPC_MOTION_MODIFIER_present))
8229 Kind = Present;
8230 const auto *EI = C->getVarRefs().begin();
8231 for (const auto L : C->component_lists()) {
8232 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8233 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8234 C->isImplicit(), std::get<2>(L), *EI);
8235 ++EI;
8239 // Look at the use_device_ptr and use_device_addr clauses information and
8240 // mark the existing map entries as such. If there is no map information for
8241 // an entry in the use_device_ptr and use_device_addr list, we create one
8242 // with map type 'alloc' and zero size section. It is the user fault if that
8243 // was not mapped before. If there is no map information and the pointer is
8244 // a struct member, then we defer the emission of that entry until the whole
8245 // struct has been processed.
8246 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8247 SmallVector<DeferredDevicePtrEntryTy, 4>>
8248 DeferredInfo;
8249 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8251 auto &&UseDeviceDataCombinedInfoGen =
8252 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8253 CodeGenFunction &CGF) {
8254 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8255 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8256 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8257 UseDeviceDataCombinedInfo.Sizes.push_back(
8258 llvm::Constant::getNullValue(CGF.Int64Ty));
8259 UseDeviceDataCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8260 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8263 auto &&MapInfoGen =
8264 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8265 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8266 OMPClauseMappableExprCommon::MappableExprComponentListRef
8267 Components,
8268 bool IsImplicit, bool IsDevAddr) {
8269 // We didn't find any match in our map information - generate a zero
8270 // size array section - if the pointer is a struct member we defer
8271 // this action until the whole struct has been processed.
8272 if (isa<MemberExpr>(IE)) {
8273 // Insert the pointer into Info to be processed by
8274 // generateInfoForComponentList. Because it is a member pointer
8275 // without a pointee, no entry will be generated for it, therefore
8276 // we need to generate one after the whole struct has been
8277 // processed. Nonetheless, generateInfoForComponentList must be
8278 // called to take the pointer into account for the calculation of
8279 // the range of the partial struct.
8280 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8281 llvm::None, /*ReturnDevicePointer=*/false, IsImplicit,
8282 nullptr, nullptr, IsDevAddr);
8283 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8284 } else {
8285 llvm::Value *Ptr;
8286 if (IsDevAddr) {
8287 if (IE->isGLValue())
8288 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8289 else
8290 Ptr = CGF.EmitScalarExpr(IE);
8291 } else {
8292 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8294 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF);
8298 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8299 const Expr *IE, bool IsDevAddr) -> bool {
8300 // We potentially have map information for this declaration already.
8301 // Look for the first set of components that refer to it. If found,
8302 // return true.
8303 // If the first component is a member expression, we have to look into
8304 // 'this', which maps to null in the map of map information. Otherwise
8305 // look directly for the information.
8306 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8307 if (It != Info.end()) {
8308 bool Found = false;
8309 for (auto &Data : It->second) {
8310 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8311 return MI.Components.back().getAssociatedDeclaration() == VD;
8313 // If we found a map entry, signal that the pointer has to be
8314 // returned and move on to the next declaration. Exclude cases where
8315 // the base pointer is mapped as array subscript, array section or
8316 // array shaping. The base address is passed as a pointer to base in
8317 // this case and cannot be used as a base for use_device_ptr list
8318 // item.
8319 if (CI != Data.end()) {
8320 if (IsDevAddr) {
8321 CI->ReturnDevicePointer = true;
8322 Found = true;
8323 break;
8324 } else {
8325 auto PrevCI = std::next(CI->Components.rbegin());
8326 const auto *VarD = dyn_cast<VarDecl>(VD);
8327 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8328 isa<MemberExpr>(IE) ||
8329 !VD->getType().getNonReferenceType()->isPointerType() ||
8330 PrevCI == CI->Components.rend() ||
8331 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8332 VarD->hasLocalStorage()) {
8333 CI->ReturnDevicePointer = true;
8334 Found = true;
8335 break;
8340 return Found;
8342 return false;
8345 // Look at the use_device_ptr clause information and mark the existing map
8346 // entries as such. If there is no map information for an entry in the
8347 // use_device_ptr list, we create one with map type 'alloc' and zero size
8348 // section. It is the user fault if that was not mapped before. If there is
8349 // no map information and the pointer is a struct member, then we defer the
8350 // emission of that entry until the whole struct has been processed.
8351 for (const auto *Cl : Clauses) {
8352 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8353 if (!C)
8354 continue;
8355 for (const auto L : C->component_lists()) {
8356 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8357 std::get<1>(L);
8358 assert(!Components.empty() &&
8359 "Not expecting empty list of components!");
8360 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8361 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8362 const Expr *IE = Components.back().getAssociatedExpression();
8363 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8364 continue;
8365 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8366 /*IsDevAddr=*/false);
8370 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8371 for (const auto *Cl : Clauses) {
8372 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8373 if (!C)
8374 continue;
8375 for (const auto L : C->component_lists()) {
8376 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8377 std::get<1>(L);
8378 assert(!std::get<1>(L).empty() &&
8379 "Not expecting empty list of components!");
8380 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8381 if (!Processed.insert(VD).second)
8382 continue;
8383 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8384 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8385 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8386 continue;
8387 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8388 /*IsDevAddr=*/true);
8392 for (const auto &Data : Info) {
8393 StructRangeInfoTy PartialStruct;
8394 // Temporary generated information.
8395 MapCombinedInfoTy CurInfo;
8396 const Decl *D = Data.first;
8397 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8398 for (const auto &M : Data.second) {
8399 for (const MapInfo &L : M) {
8400 assert(!L.Components.empty() &&
8401 "Not expecting declaration with no component lists.");
8403 // Remember the current base pointer index.
8404 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8405 CurInfo.NonContigInfo.IsNonContiguous =
8406 L.Components.back().isNonContiguous();
8407 generateInfoForComponentList(
8408 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8409 CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8410 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8412 // If this entry relates with a device pointer, set the relevant
8413 // declaration and add the 'return pointer' flag.
8414 if (L.ReturnDevicePointer) {
8415 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8416 "Unexpected number of mapped base pointers.");
8418 const ValueDecl *RelevantVD =
8419 L.Components.back().getAssociatedDeclaration();
8420 assert(RelevantVD &&
8421 "No relevant declaration related with device pointer??");
8423 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8424 RelevantVD);
8425 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8430 // Append any pending zero-length pointers which are struct members and
8431 // used with use_device_ptr or use_device_addr.
8432 auto CI = DeferredInfo.find(Data.first);
8433 if (CI != DeferredInfo.end()) {
8434 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8435 llvm::Value *BasePtr;
8436 llvm::Value *Ptr;
8437 if (L.ForDeviceAddr) {
8438 if (L.IE->isGLValue())
8439 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8440 else
8441 Ptr = this->CGF.EmitScalarExpr(L.IE);
8442 BasePtr = Ptr;
8443 // Entry is RETURN_PARAM. Also, set the placeholder value
8444 // MEMBER_OF=FFFF so that the entry is later updated with the
8445 // correct value of MEMBER_OF.
8446 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8447 } else {
8448 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8449 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8450 L.IE->getExprLoc());
8451 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8452 // placeholder value MEMBER_OF=FFFF so that the entry is later
8453 // updated with the correct value of MEMBER_OF.
8454 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8455 OMP_MAP_MEMBER_OF);
8457 CurInfo.Exprs.push_back(L.VD);
8458 CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8459 CurInfo.Pointers.push_back(Ptr);
8460 CurInfo.Sizes.push_back(
8461 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8462 CurInfo.Mappers.push_back(nullptr);
8465 // If there is an entry in PartialStruct it means we have a struct with
8466 // individual members mapped. Emit an extra combined entry.
8467 if (PartialStruct.Base.isValid()) {
8468 CurInfo.NonContigInfo.Dims.push_back(0);
8469 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8472 // We need to append the results of this capture to what we already
8473 // have.
8474 CombinedInfo.append(CurInfo);
8476 // Append data for use_device_ptr clauses.
8477 CombinedInfo.append(UseDeviceDataCombinedInfo);
8480 public:
8481 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8482 : CurDir(&Dir), CGF(CGF) {
8483 // Extract firstprivate clause information.
8484 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8485 for (const auto *D : C->varlists())
8486 FirstPrivateDecls.try_emplace(
8487 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8488 // Extract implicit firstprivates from uses_allocators clauses.
8489 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8490 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8491 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8492 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8493 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8494 /*Implicit=*/true);
8495 else if (const auto *VD = dyn_cast<VarDecl>(
8496 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8497 ->getDecl()))
8498 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8501 // Extract device pointer clause information.
8502 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8503 for (auto L : C->component_lists())
8504 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8505 // Extract device addr clause information.
8506 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8507 for (auto L : C->component_lists())
8508 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8509 // Extract map information.
8510 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8511 if (C->getMapType() != OMPC_MAP_to)
8512 continue;
8513 for (auto L : C->component_lists()) {
8514 const ValueDecl *VD = std::get<0>(L);
8515 const auto *RD = VD ? VD->getType()
8516 .getCanonicalType()
8517 .getNonReferenceType()
8518 ->getAsCXXRecordDecl()
8519 : nullptr;
8520 if (RD && RD->isLambda())
8521 LambdasMap.try_emplace(std::get<0>(L), C);
8526 /// Constructor for the declare mapper directive.
8527 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8528 : CurDir(&Dir), CGF(CGF) {}
8530 /// Generate code for the combined entry if we have a partially mapped struct
8531 /// and take care of the mapping flags of the arguments corresponding to
8532 /// individual struct members.
8533 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8534 MapFlagsArrayTy &CurTypes,
8535 const StructRangeInfoTy &PartialStruct,
8536 const ValueDecl *VD = nullptr,
8537 bool NotTargetParams = true) const {
8538 if (CurTypes.size() == 1 &&
8539 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8540 !PartialStruct.IsArraySection)
8541 return;
8542 Address LBAddr = PartialStruct.LowestElem.second;
8543 Address HBAddr = PartialStruct.HighestElem.second;
8544 if (PartialStruct.HasCompleteRecord) {
8545 LBAddr = PartialStruct.LB;
8546 HBAddr = PartialStruct.LB;
8548 CombinedInfo.Exprs.push_back(VD);
8549 // Base is the base of the struct
8550 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8551 // Pointer is the address of the lowest element
8552 llvm::Value *LB = LBAddr.getPointer();
8553 CombinedInfo.Pointers.push_back(LB);
8554 // There should not be a mapper for a combined entry.
8555 CombinedInfo.Mappers.push_back(nullptr);
8556 // Size is (addr of {highest+1} element) - (addr of lowest element)
8557 llvm::Value *HB = HBAddr.getPointer();
8558 llvm::Value *HAddr =
8559 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8560 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8561 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8562 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8563 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8564 /*isSigned=*/false);
8565 CombinedInfo.Sizes.push_back(Size);
8566 // Map type is always TARGET_PARAM, if generate info for captures.
8567 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8568 : OMP_MAP_TARGET_PARAM);
8569 // If any element has the present modifier, then make sure the runtime
8570 // doesn't attempt to allocate the struct.
8571 if (CurTypes.end() !=
8572 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8573 return Type & OMP_MAP_PRESENT;
8575 CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8576 // Remove TARGET_PARAM flag from the first element
8577 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8578 // If any element has the ompx_hold modifier, then make sure the runtime
8579 // uses the hold reference count for the struct as a whole so that it won't
8580 // be unmapped by an extra dynamic reference count decrement. Add it to all
8581 // elements as well so the runtime knows which reference count to check
8582 // when determining whether it's time for device-to-host transfers of
8583 // individual elements.
8584 if (CurTypes.end() !=
8585 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8586 return Type & OMP_MAP_OMPX_HOLD;
8587 })) {
8588 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8589 for (auto &M : CurTypes)
8590 M |= OMP_MAP_OMPX_HOLD;
8593 // All other current entries will be MEMBER_OF the combined entry
8594 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8595 // 0xFFFF in the MEMBER_OF field).
8596 OpenMPOffloadMappingFlags MemberOfFlag =
8597 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8598 for (auto &M : CurTypes)
8599 setCorrectMemberOfFlag(M, MemberOfFlag);
8602 /// Generate all the base pointers, section pointers, sizes, map types, and
8603 /// mappers for the extracted mappable expressions (all included in \a
8604 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8605 /// pair of the relevant declaration and index where it occurs is appended to
8606 /// the device pointers info array.
8607 void generateAllInfo(
8608 MapCombinedInfoTy &CombinedInfo,
8609 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8610 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8611 assert(CurDir.is<const OMPExecutableDirective *>() &&
8612 "Expect a executable directive");
8613 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8614 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8617 /// Generate all the base pointers, section pointers, sizes, map types, and
8618 /// mappers for the extracted map clauses of user-defined mapper (all included
8619 /// in \a CombinedInfo).
8620 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8621 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8622 "Expect a declare mapper directive");
8623 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8624 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8627 /// Emit capture info for lambdas for variables captured by reference.
8628 void generateInfoForLambdaCaptures(
8629 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8630 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8631 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8632 const auto *RD = VDType->getAsCXXRecordDecl();
8633 if (!RD || !RD->isLambda())
8634 return;
8635 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8636 CGF.getContext().getDeclAlign(VD));
8637 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8638 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8639 FieldDecl *ThisCapture = nullptr;
8640 RD->getCaptureFields(Captures, ThisCapture);
8641 if (ThisCapture) {
8642 LValue ThisLVal =
8643 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8644 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8645 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8646 VDLVal.getPointer(CGF));
8647 CombinedInfo.Exprs.push_back(VD);
8648 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8649 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8650 CombinedInfo.Sizes.push_back(
8651 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8652 CGF.Int64Ty, /*isSigned=*/true));
8653 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8654 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8655 CombinedInfo.Mappers.push_back(nullptr);
8657 for (const LambdaCapture &LC : RD->captures()) {
8658 if (!LC.capturesVariable())
8659 continue;
8660 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8661 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8662 continue;
8663 auto It = Captures.find(VD);
8664 assert(It != Captures.end() && "Found lambda capture without field.");
8665 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8666 if (LC.getCaptureKind() == LCK_ByRef) {
8667 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8668 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8669 VDLVal.getPointer(CGF));
8670 CombinedInfo.Exprs.push_back(VD);
8671 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8672 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8673 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8674 CGF.getTypeSize(
8675 VD->getType().getCanonicalType().getNonReferenceType()),
8676 CGF.Int64Ty, /*isSigned=*/true));
8677 } else {
8678 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8679 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8680 VDLVal.getPointer(CGF));
8681 CombinedInfo.Exprs.push_back(VD);
8682 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8683 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8684 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8686 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8687 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8688 CombinedInfo.Mappers.push_back(nullptr);
8692 /// Set correct indices for lambdas captures.
8693 void adjustMemberOfForLambdaCaptures(
8694 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8695 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8696 MapFlagsArrayTy &Types) const {
8697 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8698 // Set correct member_of idx for all implicit lambda captures.
8699 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8700 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8701 continue;
8702 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8703 assert(BasePtr && "Unable to find base lambda address.");
8704 int TgtIdx = -1;
8705 for (unsigned J = I; J > 0; --J) {
8706 unsigned Idx = J - 1;
8707 if (Pointers[Idx] != BasePtr)
8708 continue;
8709 TgtIdx = Idx;
8710 break;
8712 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8713 // All other current entries will be MEMBER_OF the combined entry
8714 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8715 // 0xFFFF in the MEMBER_OF field).
8716 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8717 setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8721 /// Generate the base pointers, section pointers, sizes, map types, and
8722 /// mappers associated to a given capture (all included in \a CombinedInfo).
8723 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8724 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8725 StructRangeInfoTy &PartialStruct) const {
8726 assert(!Cap->capturesVariableArrayType() &&
8727 "Not expecting to generate map info for a variable array type!");
8729 // We need to know when we generating information for the first component
8730 const ValueDecl *VD = Cap->capturesThis()
8731 ? nullptr
8732 : Cap->getCapturedVar()->getCanonicalDecl();
8734 // for map(to: lambda): skip here, processing it in
8735 // generateDefaultMapInfo
8736 if (LambdasMap.count(VD))
8737 return;
8739 // If this declaration appears in a is_device_ptr clause we just have to
8740 // pass the pointer by value. If it is a reference to a declaration, we just
8741 // pass its value.
8742 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8743 CombinedInfo.Exprs.push_back(VD);
8744 CombinedInfo.BasePointers.emplace_back(Arg, VD);
8745 CombinedInfo.Pointers.push_back(Arg);
8746 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8747 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8748 /*isSigned=*/true));
8749 CombinedInfo.Types.push_back(
8750 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8751 OMP_MAP_TARGET_PARAM);
8752 CombinedInfo.Mappers.push_back(nullptr);
8753 return;
8756 using MapData =
8757 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8758 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8759 const ValueDecl *, const Expr *>;
8760 SmallVector<MapData, 4> DeclComponentLists;
8761 // For member fields list in is_device_ptr, store it in
8762 // DeclComponentLists for generating components info.
8763 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8764 auto It = DevPointersMap.find(VD);
8765 if (It != DevPointersMap.end())
8766 for (const auto &MCL : It->second)
8767 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8768 /*IsImpicit = */ true, nullptr,
8769 nullptr);
8770 auto I = HasDevAddrsMap.find(VD);
8771 if (I != HasDevAddrsMap.end())
8772 for (const auto &MCL : I->second)
8773 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8774 /*IsImpicit = */ true, nullptr,
8775 nullptr);
8776 assert(CurDir.is<const OMPExecutableDirective *>() &&
8777 "Expect a executable directive");
8778 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8779 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8780 const auto *EI = C->getVarRefs().begin();
8781 for (const auto L : C->decl_component_lists(VD)) {
8782 const ValueDecl *VDecl, *Mapper;
8783 // The Expression is not correct if the mapping is implicit
8784 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8785 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8786 std::tie(VDecl, Components, Mapper) = L;
8787 assert(VDecl == VD && "We got information for the wrong declaration??");
8788 assert(!Components.empty() &&
8789 "Not expecting declaration with no component lists.");
8790 DeclComponentLists.emplace_back(Components, C->getMapType(),
8791 C->getMapTypeModifiers(),
8792 C->isImplicit(), Mapper, E);
8793 ++EI;
8796 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8797 const MapData &RHS) {
8798 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8799 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8800 bool HasPresent =
8801 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8802 bool HasAllocs = MapType == OMPC_MAP_alloc;
8803 MapModifiers = std::get<2>(RHS);
8804 MapType = std::get<1>(LHS);
8805 bool HasPresentR =
8806 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8807 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8808 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8811 // Find overlapping elements (including the offset from the base element).
8812 llvm::SmallDenseMap<
8813 const MapData *,
8814 llvm::SmallVector<
8815 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8817 OverlappedData;
8818 size_t Count = 0;
8819 for (const MapData &L : DeclComponentLists) {
8820 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8821 OpenMPMapClauseKind MapType;
8822 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8823 bool IsImplicit;
8824 const ValueDecl *Mapper;
8825 const Expr *VarRef;
8826 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8828 ++Count;
8829 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8830 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8831 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8832 VarRef) = L1;
8833 auto CI = Components.rbegin();
8834 auto CE = Components.rend();
8835 auto SI = Components1.rbegin();
8836 auto SE = Components1.rend();
8837 for (; CI != CE && SI != SE; ++CI, ++SI) {
8838 if (CI->getAssociatedExpression()->getStmtClass() !=
8839 SI->getAssociatedExpression()->getStmtClass())
8840 break;
8841 // Are we dealing with different variables/fields?
8842 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8843 break;
8845 // Found overlapping if, at least for one component, reached the head
8846 // of the components list.
8847 if (CI == CE || SI == SE) {
8848 // Ignore it if it is the same component.
8849 if (CI == CE && SI == SE)
8850 continue;
8851 const auto It = (SI == SE) ? CI : SI;
8852 // If one component is a pointer and another one is a kind of
8853 // dereference of this pointer (array subscript, section, dereference,
8854 // etc.), it is not an overlapping.
8855 // Same, if one component is a base and another component is a
8856 // dereferenced pointer memberexpr with the same base.
8857 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8858 (std::prev(It)->getAssociatedDeclaration() &&
8859 std::prev(It)
8860 ->getAssociatedDeclaration()
8861 ->getType()
8862 ->isPointerType()) ||
8863 (It->getAssociatedDeclaration() &&
8864 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8865 std::next(It) != CE && std::next(It) != SE))
8866 continue;
8867 const MapData &BaseData = CI == CE ? L : L1;
8868 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8869 SI == SE ? Components : Components1;
8870 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8871 OverlappedElements.getSecond().push_back(SubData);
8875 // Sort the overlapped elements for each item.
8876 llvm::SmallVector<const FieldDecl *, 4> Layout;
8877 if (!OverlappedData.empty()) {
8878 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8879 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8880 while (BaseType != OrigType) {
8881 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8882 OrigType = BaseType->getPointeeOrArrayElementType();
8885 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8886 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8887 else {
8888 const auto *RD = BaseType->getAsRecordDecl();
8889 Layout.append(RD->field_begin(), RD->field_end());
8892 for (auto &Pair : OverlappedData) {
8893 llvm::stable_sort(
8894 Pair.getSecond(),
8895 [&Layout](
8896 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8897 OMPClauseMappableExprCommon::MappableExprComponentListRef
8898 Second) {
8899 auto CI = First.rbegin();
8900 auto CE = First.rend();
8901 auto SI = Second.rbegin();
8902 auto SE = Second.rend();
8903 for (; CI != CE && SI != SE; ++CI, ++SI) {
8904 if (CI->getAssociatedExpression()->getStmtClass() !=
8905 SI->getAssociatedExpression()->getStmtClass())
8906 break;
8907 // Are we dealing with different variables/fields?
8908 if (CI->getAssociatedDeclaration() !=
8909 SI->getAssociatedDeclaration())
8910 break;
8913 // Lists contain the same elements.
8914 if (CI == CE && SI == SE)
8915 return false;
8917 // List with less elements is less than list with more elements.
8918 if (CI == CE || SI == SE)
8919 return CI == CE;
8921 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8922 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8923 if (FD1->getParent() == FD2->getParent())
8924 return FD1->getFieldIndex() < FD2->getFieldIndex();
8925 const auto *It =
8926 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8927 return FD == FD1 || FD == FD2;
8929 return *It == FD1;
8933 // Associated with a capture, because the mapping flags depend on it.
8934 // Go through all of the elements with the overlapped elements.
8935 bool IsFirstComponentList = true;
8936 for (const auto &Pair : OverlappedData) {
8937 const MapData &L = *Pair.getFirst();
8938 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8939 OpenMPMapClauseKind MapType;
8940 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8941 bool IsImplicit;
8942 const ValueDecl *Mapper;
8943 const Expr *VarRef;
8944 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8946 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8947 OverlappedComponents = Pair.getSecond();
8948 generateInfoForComponentList(
8949 MapType, MapModifiers, llvm::None, Components, CombinedInfo,
8950 PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8951 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8952 IsFirstComponentList = false;
8954 // Go through other elements without overlapped elements.
8955 for (const MapData &L : DeclComponentLists) {
8956 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8957 OpenMPMapClauseKind MapType;
8958 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8959 bool IsImplicit;
8960 const ValueDecl *Mapper;
8961 const Expr *VarRef;
8962 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8964 auto It = OverlappedData.find(&L);
8965 if (It == OverlappedData.end())
8966 generateInfoForComponentList(MapType, MapModifiers, llvm::None,
8967 Components, CombinedInfo, PartialStruct,
8968 IsFirstComponentList, IsImplicit, Mapper,
8969 /*ForDeviceAddr=*/false, VD, VarRef);
8970 IsFirstComponentList = false;
8974 /// Generate the default map information for a given capture \a CI,
8975 /// record field declaration \a RI and captured value \a CV.
8976 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8977 const FieldDecl &RI, llvm::Value *CV,
8978 MapCombinedInfoTy &CombinedInfo) const {
8979 bool IsImplicit = true;
8980 // Do the default mapping.
8981 if (CI.capturesThis()) {
8982 CombinedInfo.Exprs.push_back(nullptr);
8983 CombinedInfo.BasePointers.push_back(CV);
8984 CombinedInfo.Pointers.push_back(CV);
8985 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8986 CombinedInfo.Sizes.push_back(
8987 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8988 CGF.Int64Ty, /*isSigned=*/true));
8989 // Default map type.
8990 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8991 } else if (CI.capturesVariableByCopy()) {
8992 const VarDecl *VD = CI.getCapturedVar();
8993 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8994 CombinedInfo.BasePointers.push_back(CV);
8995 CombinedInfo.Pointers.push_back(CV);
8996 if (!RI.getType()->isAnyPointerType()) {
8997 // We have to signal to the runtime captures passed by value that are
8998 // not pointers.
8999 CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9000 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9001 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9002 } else {
9003 // Pointers are implicitly mapped with a zero size and no flags
9004 // (other than first map that is added for all implicit maps).
9005 CombinedInfo.Types.push_back(OMP_MAP_NONE);
9006 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9008 auto I = FirstPrivateDecls.find(VD);
9009 if (I != FirstPrivateDecls.end())
9010 IsImplicit = I->getSecond();
9011 } else {
9012 assert(CI.capturesVariable() && "Expected captured reference.");
9013 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9014 QualType ElementType = PtrTy->getPointeeType();
9015 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9016 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9017 // The default map type for a scalar/complex type is 'to' because by
9018 // default the value doesn't have to be retrieved. For an aggregate
9019 // type, the default is 'tofrom'.
9020 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9021 const VarDecl *VD = CI.getCapturedVar();
9022 auto I = FirstPrivateDecls.find(VD);
9023 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9024 CombinedInfo.BasePointers.push_back(CV);
9025 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9026 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9027 CV, ElementType, CGF.getContext().getDeclAlign(VD),
9028 AlignmentSource::Decl));
9029 CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9030 } else {
9031 CombinedInfo.Pointers.push_back(CV);
9033 if (I != FirstPrivateDecls.end())
9034 IsImplicit = I->getSecond();
9036 // Every default map produces a single argument which is a target parameter.
9037 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9039 // Add flag stating this is an implicit map.
9040 if (IsImplicit)
9041 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9043 // No user-defined mapper for default mapping.
9044 CombinedInfo.Mappers.push_back(nullptr);
9047 } // anonymous namespace
9049 static void emitNonContiguousDescriptor(
9050 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9051 CGOpenMPRuntime::TargetDataInfo &Info) {
9052 CodeGenModule &CGM = CGF.CGM;
9053 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9054 &NonContigInfo = CombinedInfo.NonContigInfo;
9056 // Build an array of struct descriptor_dim and then assign it to
9057 // offload_args.
9059 // struct descriptor_dim {
9060 // uint64_t offset;
9061 // uint64_t count;
9062 // uint64_t stride
9063 // };
9064 ASTContext &C = CGF.getContext();
9065 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9066 RecordDecl *RD;
9067 RD = C.buildImplicitRecord("descriptor_dim");
9068 RD->startDefinition();
9069 addFieldToRecordDecl(C, RD, Int64Ty);
9070 addFieldToRecordDecl(C, RD, Int64Ty);
9071 addFieldToRecordDecl(C, RD, Int64Ty);
9072 RD->completeDefinition();
9073 QualType DimTy = C.getRecordType(RD);
9075 enum { OffsetFD = 0, CountFD, StrideFD };
9076 // We need two index variable here since the size of "Dims" is the same as the
9077 // size of Components, however, the size of offset, count, and stride is equal
9078 // to the size of base declaration that is non-contiguous.
9079 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9080 // Skip emitting ir if dimension size is 1 since it cannot be
9081 // non-contiguous.
9082 if (NonContigInfo.Dims[I] == 1)
9083 continue;
9084 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9085 QualType ArrayTy =
9086 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9087 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9088 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9089 unsigned RevIdx = EE - II - 1;
9090 LValue DimsLVal = CGF.MakeAddrLValue(
9091 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9092 // Offset
9093 LValue OffsetLVal = CGF.EmitLValueForField(
9094 DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9095 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9096 // Count
9097 LValue CountLVal = CGF.EmitLValueForField(
9098 DimsLVal, *std::next(RD->field_begin(), CountFD));
9099 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9100 // Stride
9101 LValue StrideLVal = CGF.EmitLValueForField(
9102 DimsLVal, *std::next(RD->field_begin(), StrideFD));
9103 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9105 // args[I] = &dims
9106 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9107 DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9108 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9109 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9110 Info.RTArgs.PointersArray, 0, I);
9111 Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign());
9112 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9113 ++L;
9117 // Try to extract the base declaration from a `this->x` expression if possible.
9118 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9119 if (!E)
9120 return nullptr;
9122 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9123 if (const MemberExpr *ME =
9124 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9125 return ME->getMemberDecl();
9126 return nullptr;
9129 /// Emit a string constant containing the names of the values mapped to the
9130 /// offloading runtime library.
9131 llvm::Constant *
9132 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9133 MappableExprsHandler::MappingExprInfo &MapExprs) {
9135 uint32_t SrcLocStrSize;
9136 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9137 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9139 SourceLocation Loc;
9140 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9141 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9142 Loc = VD->getLocation();
9143 else
9144 Loc = MapExprs.getMapExpr()->getExprLoc();
9145 } else {
9146 Loc = MapExprs.getMapDecl()->getLocation();
9149 std::string ExprName;
9150 if (MapExprs.getMapExpr()) {
9151 PrintingPolicy P(CGF.getContext().getLangOpts());
9152 llvm::raw_string_ostream OS(ExprName);
9153 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9154 OS.flush();
9155 } else {
9156 ExprName = MapExprs.getMapDecl()->getNameAsString();
9159 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9160 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9161 PLoc.getLine(), PLoc.getColumn(),
9162 SrcLocStrSize);
9165 /// Emit the arrays used to pass the captures and map information to the
9166 /// offloading runtime library. If there is no map or capture information,
9167 /// return nullptr by reference.
9168 static void emitOffloadingArrays(
9169 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9170 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9171 bool IsNonContiguous = false) {
9172 CodeGenModule &CGM = CGF.CGM;
9173 ASTContext &Ctx = CGF.getContext();
9175 // Reset the array information.
9176 Info.clearArrayInfo();
9177 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9179 if (Info.NumberOfPtrs) {
9180 // Detect if we have any capture size requiring runtime evaluation of the
9181 // size so that a constant array could be eventually used.
9183 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9184 QualType PointerArrayType = Ctx.getConstantArrayType(
9185 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9186 /*IndexTypeQuals=*/0);
9188 Info.RTArgs.BasePointersArray =
9189 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9190 Info.RTArgs.PointersArray =
9191 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9192 Address MappersArray =
9193 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9194 Info.RTArgs.MappersArray = MappersArray.getPointer();
9196 // If we don't have any VLA types or other types that require runtime
9197 // evaluation, we can use a constant array for the map sizes, otherwise we
9198 // need to fill up the arrays as we do for the pointers.
9199 QualType Int64Ty =
9200 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9201 SmallVector<llvm::Constant *> ConstSizes(
9202 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9203 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9204 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9205 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9206 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9207 if (IsNonContiguous && (CombinedInfo.Types[I] &
9208 MappableExprsHandler::OMP_MAP_NON_CONTIG))
9209 ConstSizes[I] = llvm::ConstantInt::get(
9210 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9211 else
9212 ConstSizes[I] = CI;
9213 continue;
9216 RuntimeSizes.set(I);
9219 if (RuntimeSizes.all()) {
9220 QualType SizeArrayType = Ctx.getConstantArrayType(
9221 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9222 /*IndexTypeQuals=*/0);
9223 Info.RTArgs.SizesArray =
9224 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9225 } else {
9226 auto *SizesArrayInit = llvm::ConstantArray::get(
9227 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9228 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9229 auto *SizesArrayGbl = new llvm::GlobalVariable(
9230 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9231 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9232 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9233 if (RuntimeSizes.any()) {
9234 QualType SizeArrayType = Ctx.getConstantArrayType(
9235 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9236 /*IndexTypeQuals=*/0);
9237 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9238 llvm::Value *GblConstPtr =
9239 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9240 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9241 CGF.Builder.CreateMemCpy(
9242 Buffer,
9243 Address(GblConstPtr, CGM.Int64Ty,
9244 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9245 /*DestWidth=*/64, /*Signed=*/false))),
9246 CGF.getTypeSize(SizeArrayType));
9247 Info.RTArgs.SizesArray = Buffer.getPointer();
9248 } else {
9249 Info.RTArgs.SizesArray = SizesArrayGbl;
9253 // The map types are always constant so we don't need to generate code to
9254 // fill arrays. Instead, we create an array constant.
9255 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9256 llvm::copy(CombinedInfo.Types, Mapping.begin());
9257 std::string MaptypesName =
9258 CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9259 auto *MapTypesArrayGbl =
9260 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9261 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9263 // The information types are only built if there is debug information
9264 // requested.
9265 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9266 Info.RTArgs.MapNamesArray = llvm::Constant::getNullValue(
9267 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9268 } else {
9269 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9270 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9272 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9273 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9274 std::string MapnamesName =
9275 CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9276 auto *MapNamesArrayGbl =
9277 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9278 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9281 // If there's a present map type modifier, it must not be applied to the end
9282 // of a region, so generate a separate map type array in that case.
9283 if (Info.separateBeginEndCalls()) {
9284 bool EndMapTypesDiffer = false;
9285 for (uint64_t &Type : Mapping) {
9286 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9287 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9288 EndMapTypesDiffer = true;
9291 if (EndMapTypesDiffer) {
9292 MapTypesArrayGbl =
9293 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9294 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9298 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9299 llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9300 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9301 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9302 Info.RTArgs.BasePointersArray, 0, I);
9303 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9304 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9305 Address BPAddr(BP, BPVal->getType(),
9306 Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9307 CGF.Builder.CreateStore(BPVal, BPAddr);
9309 if (Info.requiresDevicePointerInfo())
9310 if (const ValueDecl *DevVD =
9311 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9312 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9314 llvm::Value *PVal = CombinedInfo.Pointers[I];
9315 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9316 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9317 Info.RTArgs.PointersArray, 0, I);
9318 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9319 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9320 Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9321 CGF.Builder.CreateStore(PVal, PAddr);
9323 if (RuntimeSizes.test(I)) {
9324 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9325 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9326 Info.RTArgs.SizesArray,
9327 /*Idx0=*/0,
9328 /*Idx1=*/I);
9329 Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty));
9330 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9331 CGM.Int64Ty,
9332 /*isSigned=*/true),
9333 SAddr);
9336 // Fill up the mapper array.
9337 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9338 if (CombinedInfo.Mappers[I]) {
9339 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9340 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9341 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9342 Info.HasMapper = true;
9344 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9345 CGF.Builder.CreateStore(MFunc, MAddr);
9349 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9350 Info.NumberOfPtrs == 0)
9351 return;
9353 emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9356 /// Check for inner distribute directive.
9357 static const OMPExecutableDirective *
9358 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9359 const auto *CS = D.getInnermostCapturedStmt();
9360 const auto *Body =
9361 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9362 const Stmt *ChildStmt =
9363 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9365 if (const auto *NestedDir =
9366 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9367 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9368 switch (D.getDirectiveKind()) {
9369 case OMPD_target:
9370 if (isOpenMPDistributeDirective(DKind))
9371 return NestedDir;
9372 if (DKind == OMPD_teams) {
9373 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9374 /*IgnoreCaptured=*/true);
9375 if (!Body)
9376 return nullptr;
9377 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9378 if (const auto *NND =
9379 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9380 DKind = NND->getDirectiveKind();
9381 if (isOpenMPDistributeDirective(DKind))
9382 return NND;
9385 return nullptr;
9386 case OMPD_target_teams:
9387 if (isOpenMPDistributeDirective(DKind))
9388 return NestedDir;
9389 return nullptr;
9390 case OMPD_target_parallel:
9391 case OMPD_target_simd:
9392 case OMPD_target_parallel_for:
9393 case OMPD_target_parallel_for_simd:
9394 return nullptr;
9395 case OMPD_target_teams_distribute:
9396 case OMPD_target_teams_distribute_simd:
9397 case OMPD_target_teams_distribute_parallel_for:
9398 case OMPD_target_teams_distribute_parallel_for_simd:
9399 case OMPD_parallel:
9400 case OMPD_for:
9401 case OMPD_parallel_for:
9402 case OMPD_parallel_master:
9403 case OMPD_parallel_sections:
9404 case OMPD_for_simd:
9405 case OMPD_parallel_for_simd:
9406 case OMPD_cancel:
9407 case OMPD_cancellation_point:
9408 case OMPD_ordered:
9409 case OMPD_threadprivate:
9410 case OMPD_allocate:
9411 case OMPD_task:
9412 case OMPD_simd:
9413 case OMPD_tile:
9414 case OMPD_unroll:
9415 case OMPD_sections:
9416 case OMPD_section:
9417 case OMPD_single:
9418 case OMPD_master:
9419 case OMPD_critical:
9420 case OMPD_taskyield:
9421 case OMPD_barrier:
9422 case OMPD_taskwait:
9423 case OMPD_taskgroup:
9424 case OMPD_atomic:
9425 case OMPD_flush:
9426 case OMPD_depobj:
9427 case OMPD_scan:
9428 case OMPD_teams:
9429 case OMPD_target_data:
9430 case OMPD_target_exit_data:
9431 case OMPD_target_enter_data:
9432 case OMPD_distribute:
9433 case OMPD_distribute_simd:
9434 case OMPD_distribute_parallel_for:
9435 case OMPD_distribute_parallel_for_simd:
9436 case OMPD_teams_distribute:
9437 case OMPD_teams_distribute_simd:
9438 case OMPD_teams_distribute_parallel_for:
9439 case OMPD_teams_distribute_parallel_for_simd:
9440 case OMPD_target_update:
9441 case OMPD_declare_simd:
9442 case OMPD_declare_variant:
9443 case OMPD_begin_declare_variant:
9444 case OMPD_end_declare_variant:
9445 case OMPD_declare_target:
9446 case OMPD_end_declare_target:
9447 case OMPD_declare_reduction:
9448 case OMPD_declare_mapper:
9449 case OMPD_taskloop:
9450 case OMPD_taskloop_simd:
9451 case OMPD_master_taskloop:
9452 case OMPD_master_taskloop_simd:
9453 case OMPD_parallel_master_taskloop:
9454 case OMPD_parallel_master_taskloop_simd:
9455 case OMPD_requires:
9456 case OMPD_metadirective:
9457 case OMPD_unknown:
9458 default:
9459 llvm_unreachable("Unexpected directive.");
9463 return nullptr;
9466 /// Emit the user-defined mapper function. The code generation follows the
9467 /// pattern in the example below.
9468 /// \code
9469 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9470 /// void *base, void *begin,
9471 /// int64_t size, int64_t type,
9472 /// void *name = nullptr) {
9473 /// // Allocate space for an array section first or add a base/begin for
9474 /// // pointer dereference.
9475 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9476 /// !maptype.IsDelete)
9477 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9478 /// size*sizeof(Ty), clearToFromMember(type));
9479 /// // Map members.
9480 /// for (unsigned i = 0; i < size; i++) {
9481 /// // For each component specified by this mapper:
9482 /// for (auto c : begin[i]->all_components) {
9483 /// if (c.hasMapper())
9484 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9485 /// c.arg_type, c.arg_name);
9486 /// else
9487 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9488 /// c.arg_begin, c.arg_size, c.arg_type,
9489 /// c.arg_name);
9490 /// }
9491 /// }
9492 /// // Delete the array section.
9493 /// if (size > 1 && maptype.IsDelete)
9494 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9495 /// size*sizeof(Ty), clearToFromMember(type));
9496 /// }
9497 /// \endcode
9498 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9499 CodeGenFunction *CGF) {
9500 if (UDMMap.count(D) > 0)
9501 return;
9502 ASTContext &C = CGM.getContext();
9503 QualType Ty = D->getType();
9504 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9505 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9506 auto *MapperVarDecl =
9507 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9508 SourceLocation Loc = D->getLocation();
9509 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9510 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9512 // Prepare mapper function arguments and attributes.
9513 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9514 C.VoidPtrTy, ImplicitParamDecl::Other);
9515 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9516 ImplicitParamDecl::Other);
9517 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9518 C.VoidPtrTy, ImplicitParamDecl::Other);
9519 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9520 ImplicitParamDecl::Other);
9521 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9522 ImplicitParamDecl::Other);
9523 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9524 ImplicitParamDecl::Other);
9525 FunctionArgList Args;
9526 Args.push_back(&HandleArg);
9527 Args.push_back(&BaseArg);
9528 Args.push_back(&BeginArg);
9529 Args.push_back(&SizeArg);
9530 Args.push_back(&TypeArg);
9531 Args.push_back(&NameArg);
9532 const CGFunctionInfo &FnInfo =
9533 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9534 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9535 SmallString<64> TyStr;
9536 llvm::raw_svector_ostream Out(TyStr);
9537 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9538 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9539 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9540 Name, &CGM.getModule());
9541 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9542 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9543 // Start the mapper function code generation.
9544 CodeGenFunction MapperCGF(CGM);
9545 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9546 // Compute the starting and end addresses of array elements.
9547 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9548 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9549 C.getPointerType(Int64Ty), Loc);
9550 // Prepare common arguments for array initiation and deletion.
9551 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9552 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9553 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9554 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9555 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9556 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9557 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9558 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9559 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9560 // Convert the size in bytes into the number of array elements.
9561 Size = MapperCGF.Builder.CreateExactUDiv(
9562 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9563 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9564 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9565 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9566 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9567 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9568 C.getPointerType(Int64Ty), Loc);
9569 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9570 MapperCGF.GetAddrOfLocalVar(&NameArg),
9571 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9573 // Emit array initiation if this is an array section and \p MapType indicates
9574 // that memory allocation is required.
9575 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9576 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9577 MapName, ElementSize, HeadBB, /*IsInit=*/true);
9579 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9581 // Emit the loop header block.
9582 MapperCGF.EmitBlock(HeadBB);
9583 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9584 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9585 // Evaluate whether the initial condition is satisfied.
9586 llvm::Value *IsEmpty =
9587 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9588 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9589 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9591 // Emit the loop body block.
9592 MapperCGF.EmitBlock(BodyBB);
9593 llvm::BasicBlock *LastBB = BodyBB;
9594 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9595 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9596 PtrPHI->addIncoming(PtrBegin, EntryBB);
9597 Address PtrCurrent(PtrPHI, ElemTy,
9598 MapperCGF.GetAddrOfLocalVar(&BeginArg)
9599 .getAlignment()
9600 .alignmentOfArrayElement(ElementSize));
9601 // Privatize the declared variable of mapper to be the current array element.
9602 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9603 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9604 (void)Scope.Privatize();
9606 // Get map clause information. Fill up the arrays with all mapped variables.
9607 MappableExprsHandler::MapCombinedInfoTy Info;
9608 MappableExprsHandler MEHandler(*D, MapperCGF);
9609 MEHandler.generateAllInfoForMapper(Info);
9611 // Call the runtime API __tgt_mapper_num_components to get the number of
9612 // pre-existing components.
9613 llvm::Value *OffloadingArgs[] = {Handle};
9614 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9615 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9616 OMPRTL___tgt_mapper_num_components),
9617 OffloadingArgs);
9618 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9619 PreviousSize,
9620 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9622 // Fill up the runtime mapper handle for all components.
9623 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9624 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9625 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9626 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9627 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9628 llvm::Value *CurSizeArg = Info.Sizes[I];
9629 llvm::Value *CurNameArg =
9630 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9631 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9632 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9634 // Extract the MEMBER_OF field from the map type.
9635 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9636 llvm::Value *MemberMapType =
9637 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9639 // Combine the map type inherited from user-defined mapper with that
9640 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9641 // bits of the \a MapType, which is the input argument of the mapper
9642 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9643 // bits of MemberMapType.
9644 // [OpenMP 5.0], 1.2.6. map-type decay.
9645 // | alloc | to | from | tofrom | release | delete
9646 // ----------------------------------------------------------
9647 // alloc | alloc | alloc | alloc | alloc | release | delete
9648 // to | alloc | to | alloc | to | release | delete
9649 // from | alloc | alloc | from | from | release | delete
9650 // tofrom | alloc | to | from | tofrom | release | delete
9651 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9652 MapType,
9653 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9654 MappableExprsHandler::OMP_MAP_FROM));
9655 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9656 llvm::BasicBlock *AllocElseBB =
9657 MapperCGF.createBasicBlock("omp.type.alloc.else");
9658 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9659 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9660 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9661 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9662 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9663 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9664 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9665 MapperCGF.EmitBlock(AllocBB);
9666 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9667 MemberMapType,
9668 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9669 MappableExprsHandler::OMP_MAP_FROM)));
9670 MapperCGF.Builder.CreateBr(EndBB);
9671 MapperCGF.EmitBlock(AllocElseBB);
9672 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9673 LeftToFrom,
9674 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9675 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9676 // In case of to, clear OMP_MAP_FROM.
9677 MapperCGF.EmitBlock(ToBB);
9678 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9679 MemberMapType,
9680 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9681 MapperCGF.Builder.CreateBr(EndBB);
9682 MapperCGF.EmitBlock(ToElseBB);
9683 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9684 LeftToFrom,
9685 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9686 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9687 // In case of from, clear OMP_MAP_TO.
9688 MapperCGF.EmitBlock(FromBB);
9689 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9690 MemberMapType,
9691 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9692 // In case of tofrom, do nothing.
9693 MapperCGF.EmitBlock(EndBB);
9694 LastBB = EndBB;
9695 llvm::PHINode *CurMapType =
9696 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9697 CurMapType->addIncoming(AllocMapType, AllocBB);
9698 CurMapType->addIncoming(ToMapType, ToBB);
9699 CurMapType->addIncoming(FromMapType, FromBB);
9700 CurMapType->addIncoming(MemberMapType, ToElseBB);
9702 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9703 CurSizeArg, CurMapType, CurNameArg};
9704 if (Info.Mappers[I]) {
9705 // Call the corresponding mapper function.
9706 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9707 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9708 assert(MapperFunc && "Expect a valid mapper function is available.");
9709 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9710 } else {
9711 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9712 // data structure.
9713 MapperCGF.EmitRuntimeCall(
9714 OMPBuilder.getOrCreateRuntimeFunction(
9715 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9716 OffloadingArgs);
9720 // Update the pointer to point to the next element that needs to be mapped,
9721 // and check whether we have mapped all elements.
9722 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9723 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9724 PtrPHI->addIncoming(PtrNext, LastBB);
9725 llvm::Value *IsDone =
9726 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9727 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9728 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9730 MapperCGF.EmitBlock(ExitBB);
9731 // Emit array deletion if this is an array section and \p MapType indicates
9732 // that deletion is required.
9733 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9734 MapName, ElementSize, DoneBB, /*IsInit=*/false);
9736 // Emit the function exit block.
9737 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9738 MapperCGF.FinishFunction();
9739 UDMMap.try_emplace(D, Fn);
9740 if (CGF) {
9741 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9742 Decls.second.push_back(D);
9746 /// Emit the array initialization or deletion portion for user-defined mapper
9747 /// code generation. First, it evaluates whether an array section is mapped and
9748 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9749 /// true, and \a MapType indicates to not delete this array, array
9750 /// initialization code is generated. If \a IsInit is false, and \a MapType
9751 /// indicates to not this array, array deletion code is generated.
9752 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9753 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9754 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9755 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9756 bool IsInit) {
9757 StringRef Prefix = IsInit ? ".init" : ".del";
9759 // Evaluate if this is an array section.
9760 llvm::BasicBlock *BodyBB =
9761 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9762 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9763 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9764 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9765 MapType,
9766 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9767 llvm::Value *DeleteCond;
9768 llvm::Value *Cond;
9769 if (IsInit) {
9770 // base != begin?
9771 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9772 // IsPtrAndObj?
9773 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9774 MapType,
9775 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
9776 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9777 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9778 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9779 DeleteCond = MapperCGF.Builder.CreateIsNull(
9780 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9781 } else {
9782 Cond = IsArray;
9783 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9784 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9786 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9787 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9789 MapperCGF.EmitBlock(BodyBB);
9790 // Get the array size by multiplying element size and element number (i.e., \p
9791 // Size).
9792 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9793 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9794 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9795 // memory allocation/deletion purpose only.
9796 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9797 MapType,
9798 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9799 MappableExprsHandler::OMP_MAP_FROM)));
9800 MapTypeArg = MapperCGF.Builder.CreateOr(
9801 MapTypeArg,
9802 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
9804 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9805 // data structure.
9806 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9807 ArraySize, MapTypeArg, MapName};
9808 MapperCGF.EmitRuntimeCall(
9809 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9810 OMPRTL___tgt_push_mapper_component),
9811 OffloadingArgs);
9814 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9815 const OMPDeclareMapperDecl *D) {
9816 auto I = UDMMap.find(D);
9817 if (I != UDMMap.end())
9818 return I->second;
9819 emitUserDefinedMapper(D);
9820 return UDMMap.lookup(D);
9823 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9824 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9825 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9826 const OMPLoopDirective &D)>
9827 SizeEmitter) {
9828 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9829 const OMPExecutableDirective *TD = &D;
9830 // Get nested teams distribute kind directive, if any.
9831 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9832 TD = getNestedDistributeDirective(CGM.getContext(), D);
9833 if (!TD)
9834 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9836 const auto *LD = cast<OMPLoopDirective>(TD);
9837 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9838 return NumIterations;
9839 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9842 void CGOpenMPRuntime::emitTargetCall(
9843 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9844 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9845 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9846 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9847 const OMPLoopDirective &D)>
9848 SizeEmitter) {
9849 if (!CGF.HaveInsertPoint())
9850 return;
9852 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
9853 CGM.getLangOpts().OpenMPOffloadMandatory;
9855 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9857 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9858 D.hasClausesOfKind<OMPNowaitClause>() ||
9859 D.hasClausesOfKind<OMPInReductionClause>();
9860 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9861 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9862 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9863 PrePostActionTy &) {
9864 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9866 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9868 CodeGenFunction::OMPTargetDataInfo InputInfo;
9869 llvm::Value *MapTypesArray = nullptr;
9870 llvm::Value *MapNamesArray = nullptr;
9871 // Generate code for the host fallback function.
9872 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
9873 &CS, OffloadingMandatory](CodeGenFunction &CGF) {
9874 if (OffloadingMandatory) {
9875 CGF.Builder.CreateUnreachable();
9876 } else {
9877 if (RequiresOuterTask) {
9878 CapturedVars.clear();
9879 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9881 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9884 // Fill up the pointer arrays and transfer execution to the device.
9885 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
9886 &MapNamesArray, SizeEmitter,
9887 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
9888 if (Device.getInt() == OMPC_DEVICE_ancestor) {
9889 // Reverse offloading is not supported, so just execute on the host.
9890 FallbackGen(CGF);
9891 return;
9894 // On top of the arrays that were filled up, the target offloading call
9895 // takes as arguments the device id as well as the host pointer. The host
9896 // pointer is used by the runtime library to identify the current target
9897 // region, so it only has to be unique and not necessarily point to
9898 // anything. It could be the pointer to the outlined function that
9899 // implements the target region, but we aren't using that so that the
9900 // compiler doesn't need to keep that, and could therefore inline the host
9901 // function if proven worthwhile during optimization.
9903 // From this point on, we need to have an ID of the target region defined.
9904 assert(OutlinedFnID && "Invalid outlined function ID!");
9905 (void)OutlinedFnID;
9907 // Emit device ID if any.
9908 llvm::Value *DeviceID;
9909 if (Device.getPointer()) {
9910 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9911 Device.getInt() == OMPC_DEVICE_device_num) &&
9912 "Expected device_num modifier.");
9913 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9914 DeviceID =
9915 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9916 } else {
9917 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9920 // Emit the number of elements in the offloading arrays.
9921 llvm::Value *PointerNum =
9922 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9924 // Return value of the runtime offloading call.
9925 llvm::Value *Return;
9927 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9928 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9930 // Source location for the ident struct
9931 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9933 // Get tripcount for the target loop-based directive.
9934 llvm::Value *NumIterations =
9935 emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9937 // Arguments for the target kernel.
9938 SmallVector<llvm::Value *> KernelArgs{
9939 CGF.Builder.getInt32(/* Version */ 1),
9940 PointerNum,
9941 InputInfo.BasePointersArray.getPointer(),
9942 InputInfo.PointersArray.getPointer(),
9943 InputInfo.SizesArray.getPointer(),
9944 MapTypesArray,
9945 MapNamesArray,
9946 InputInfo.MappersArray.getPointer(),
9947 NumIterations};
9949 // Arguments passed to the 'nowait' variant.
9950 SmallVector<llvm::Value *> NoWaitKernelArgs{
9951 CGF.Builder.getInt32(0),
9952 llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
9953 CGF.Builder.getInt32(0),
9954 llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
9957 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9959 // The target region is an outlined function launched by the runtime
9960 // via calls to __tgt_target_kernel().
9962 // Note that on the host and CPU targets, the runtime implementation of
9963 // these calls simply call the outlined function without forking threads.
9964 // The outlined functions themselves have runtime calls to
9965 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9966 // the compiler in emitTeamsCall() and emitParallelCall().
9968 // In contrast, on the NVPTX target, the implementation of
9969 // __tgt_target_teams() launches a GPU kernel with the requested number
9970 // of teams and threads so no additional calls to the runtime are required.
9971 // Check the error code and execute the host version if required.
9972 CGF.Builder.restoreIP(
9973 HasNoWait ? OMPBuilder.emitTargetKernel(
9974 CGF.Builder, Return, RTLoc, DeviceID, NumTeams,
9975 NumThreads, OutlinedFnID, KernelArgs, NoWaitKernelArgs)
9976 : OMPBuilder.emitTargetKernel(CGF.Builder, Return, RTLoc,
9977 DeviceID, NumTeams, NumThreads,
9978 OutlinedFnID, KernelArgs));
9980 llvm::BasicBlock *OffloadFailedBlock =
9981 CGF.createBasicBlock("omp_offload.failed");
9982 llvm::BasicBlock *OffloadContBlock =
9983 CGF.createBasicBlock("omp_offload.cont");
9984 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9985 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9987 CGF.EmitBlock(OffloadFailedBlock);
9988 FallbackGen(CGF);
9990 CGF.EmitBranch(OffloadContBlock);
9992 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9995 // Notify that the host version must be executed.
9996 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
9997 FallbackGen(CGF);
10000 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10001 &MapNamesArray, &CapturedVars, RequiresOuterTask,
10002 &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10003 // Fill up the arrays with all the captured variables.
10004 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10006 // Get mappable expression information.
10007 MappableExprsHandler MEHandler(D, CGF);
10008 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10009 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10011 auto RI = CS.getCapturedRecordDecl()->field_begin();
10012 auto *CV = CapturedVars.begin();
10013 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10014 CE = CS.capture_end();
10015 CI != CE; ++CI, ++RI, ++CV) {
10016 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10017 MappableExprsHandler::StructRangeInfoTy PartialStruct;
10019 // VLA sizes are passed to the outlined region by copy and do not have map
10020 // information associated.
10021 if (CI->capturesVariableArrayType()) {
10022 CurInfo.Exprs.push_back(nullptr);
10023 CurInfo.BasePointers.push_back(*CV);
10024 CurInfo.Pointers.push_back(*CV);
10025 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10026 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10027 // Copy to the device as an argument. No need to retrieve it.
10028 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10029 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10030 MappableExprsHandler::OMP_MAP_IMPLICIT);
10031 CurInfo.Mappers.push_back(nullptr);
10032 } else {
10033 // If we have any information in the map clause, we use it, otherwise we
10034 // just do a default mapping.
10035 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10036 if (!CI->capturesThis())
10037 MappedVarSet.insert(CI->getCapturedVar());
10038 else
10039 MappedVarSet.insert(nullptr);
10040 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10041 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10042 // Generate correct mapping for variables captured by reference in
10043 // lambdas.
10044 if (CI->capturesVariable())
10045 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10046 CurInfo, LambdaPointers);
10048 // We expect to have at least an element of information for this capture.
10049 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10050 "Non-existing map pointer for capture!");
10051 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10052 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10053 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10054 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10055 "Inconsistent map information sizes!");
10057 // If there is an entry in PartialStruct it means we have a struct with
10058 // individual members mapped. Emit an extra combined entry.
10059 if (PartialStruct.Base.isValid()) {
10060 CombinedInfo.append(PartialStruct.PreliminaryMapData);
10061 MEHandler.emitCombinedEntry(
10062 CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10063 !PartialStruct.PreliminaryMapData.BasePointers.empty());
10066 // We need to append the results of this capture to what we already have.
10067 CombinedInfo.append(CurInfo);
10069 // Adjust MEMBER_OF flags for the lambdas captures.
10070 MEHandler.adjustMemberOfForLambdaCaptures(
10071 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10072 CombinedInfo.Types);
10073 // Map any list items in a map clause that were not captures because they
10074 // weren't referenced within the construct.
10075 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10077 CGOpenMPRuntime::TargetDataInfo Info;
10078 // Fill up the arrays and create the arguments.
10079 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10080 bool EmitDebug =
10081 CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
10082 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10083 EmitDebug,
10084 /*ForEndCall=*/false);
10086 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10087 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10088 CGF.VoidPtrTy, CGM.getPointerAlign());
10089 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10090 CGM.getPointerAlign());
10091 InputInfo.SizesArray =
10092 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10093 InputInfo.MappersArray =
10094 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10095 MapTypesArray = Info.RTArgs.MapTypesArray;
10096 MapNamesArray = Info.RTArgs.MapNamesArray;
10097 if (RequiresOuterTask)
10098 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10099 else
10100 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10103 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10104 CodeGenFunction &CGF, PrePostActionTy &) {
10105 if (RequiresOuterTask) {
10106 CodeGenFunction::OMPTargetDataInfo InputInfo;
10107 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10108 } else {
10109 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10113 // If we have a target function ID it means that we need to support
10114 // offloading, otherwise, just execute on the host. We need to execute on host
10115 // regardless of the conditional in the if clause if, e.g., the user do not
10116 // specify target triples.
10117 if (OutlinedFnID) {
10118 if (IfCond) {
10119 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10120 } else {
10121 RegionCodeGenTy ThenRCG(TargetThenGen);
10122 ThenRCG(CGF);
10124 } else {
10125 RegionCodeGenTy ElseRCG(TargetElseGen);
10126 ElseRCG(CGF);
10130 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10131 StringRef ParentName) {
10132 if (!S)
10133 return;
10135 // Codegen OMP target directives that offload compute to the device.
10136 bool RequiresDeviceCodegen =
10137 isa<OMPExecutableDirective>(S) &&
10138 isOpenMPTargetExecutionDirective(
10139 cast<OMPExecutableDirective>(S)->getDirectiveKind());
10141 if (RequiresDeviceCodegen) {
10142 const auto &E = *cast<OMPExecutableDirective>(S);
10143 auto EntryInfo =
10144 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), ParentName);
10146 // Is this a target region that should not be emitted as an entry point? If
10147 // so just signal we are done with this target region.
10148 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(EntryInfo))
10149 return;
10151 switch (E.getDirectiveKind()) {
10152 case OMPD_target:
10153 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10154 cast<OMPTargetDirective>(E));
10155 break;
10156 case OMPD_target_parallel:
10157 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10158 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10159 break;
10160 case OMPD_target_teams:
10161 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10162 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10163 break;
10164 case OMPD_target_teams_distribute:
10165 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10166 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10167 break;
10168 case OMPD_target_teams_distribute_simd:
10169 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10170 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10171 break;
10172 case OMPD_target_parallel_for:
10173 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10174 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10175 break;
10176 case OMPD_target_parallel_for_simd:
10177 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10178 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10179 break;
10180 case OMPD_target_simd:
10181 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10182 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10183 break;
10184 case OMPD_target_teams_distribute_parallel_for:
10185 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10186 CGM, ParentName,
10187 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10188 break;
10189 case OMPD_target_teams_distribute_parallel_for_simd:
10190 CodeGenFunction::
10191 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10192 CGM, ParentName,
10193 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10194 break;
10195 case OMPD_parallel:
10196 case OMPD_for:
10197 case OMPD_parallel_for:
10198 case OMPD_parallel_master:
10199 case OMPD_parallel_sections:
10200 case OMPD_for_simd:
10201 case OMPD_parallel_for_simd:
10202 case OMPD_cancel:
10203 case OMPD_cancellation_point:
10204 case OMPD_ordered:
10205 case OMPD_threadprivate:
10206 case OMPD_allocate:
10207 case OMPD_task:
10208 case OMPD_simd:
10209 case OMPD_tile:
10210 case OMPD_unroll:
10211 case OMPD_sections:
10212 case OMPD_section:
10213 case OMPD_single:
10214 case OMPD_master:
10215 case OMPD_critical:
10216 case OMPD_taskyield:
10217 case OMPD_barrier:
10218 case OMPD_taskwait:
10219 case OMPD_taskgroup:
10220 case OMPD_atomic:
10221 case OMPD_flush:
10222 case OMPD_depobj:
10223 case OMPD_scan:
10224 case OMPD_teams:
10225 case OMPD_target_data:
10226 case OMPD_target_exit_data:
10227 case OMPD_target_enter_data:
10228 case OMPD_distribute:
10229 case OMPD_distribute_simd:
10230 case OMPD_distribute_parallel_for:
10231 case OMPD_distribute_parallel_for_simd:
10232 case OMPD_teams_distribute:
10233 case OMPD_teams_distribute_simd:
10234 case OMPD_teams_distribute_parallel_for:
10235 case OMPD_teams_distribute_parallel_for_simd:
10236 case OMPD_target_update:
10237 case OMPD_declare_simd:
10238 case OMPD_declare_variant:
10239 case OMPD_begin_declare_variant:
10240 case OMPD_end_declare_variant:
10241 case OMPD_declare_target:
10242 case OMPD_end_declare_target:
10243 case OMPD_declare_reduction:
10244 case OMPD_declare_mapper:
10245 case OMPD_taskloop:
10246 case OMPD_taskloop_simd:
10247 case OMPD_master_taskloop:
10248 case OMPD_master_taskloop_simd:
10249 case OMPD_parallel_master_taskloop:
10250 case OMPD_parallel_master_taskloop_simd:
10251 case OMPD_requires:
10252 case OMPD_metadirective:
10253 case OMPD_unknown:
10254 default:
10255 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10257 return;
10260 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10261 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10262 return;
10264 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10265 return;
10268 // If this is a lambda function, look into its body.
10269 if (const auto *L = dyn_cast<LambdaExpr>(S))
10270 S = L->getBody();
10272 // Keep looking for target regions recursively.
10273 for (const Stmt *II : S->children())
10274 scanForTargetRegionsFunctions(II, ParentName);
10277 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10278 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10279 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10280 if (!DevTy)
10281 return false;
10282 // Do not emit device_type(nohost) functions for the host.
10283 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10284 return true;
10285 // Do not emit device_type(host) functions for the device.
10286 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10287 return true;
10288 return false;
10291 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10292 // If emitting code for the host, we do not process FD here. Instead we do
10293 // the normal code generation.
10294 if (!CGM.getLangOpts().OpenMPIsDevice) {
10295 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10296 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10297 CGM.getLangOpts().OpenMPIsDevice))
10298 return true;
10299 return false;
10302 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10303 // Try to detect target regions in the function.
10304 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10305 StringRef Name = CGM.getMangledName(GD);
10306 scanForTargetRegionsFunctions(FD->getBody(), Name);
10307 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10308 CGM.getLangOpts().OpenMPIsDevice))
10309 return true;
10312 // Do not to emit function if it is not marked as declare target.
10313 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10314 AlreadyEmittedTargetDecls.count(VD) == 0;
10317 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10318 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10319 CGM.getLangOpts().OpenMPIsDevice))
10320 return true;
10322 if (!CGM.getLangOpts().OpenMPIsDevice)
10323 return false;
10325 // Check if there are Ctors/Dtors in this declaration and look for target
10326 // regions in it. We use the complete variant to produce the kernel name
10327 // mangling.
10328 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10329 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10330 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10331 StringRef ParentName =
10332 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10333 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10335 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10336 StringRef ParentName =
10337 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10338 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10342 // Do not to emit variable if it is not marked as declare target.
10343 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10344 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10345 cast<VarDecl>(GD.getDecl()));
10346 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10347 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10348 HasRequiresUnifiedSharedMemory)) {
10349 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10350 return true;
10352 return false;
10355 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10356 llvm::Constant *Addr) {
10357 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10358 !CGM.getLangOpts().OpenMPIsDevice)
10359 return;
10361 // If we have host/nohost variables, they do not need to be registered.
10362 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10363 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10364 if (DevTy && *DevTy != OMPDeclareTargetDeclAttr::DT_Any)
10365 return;
10367 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10368 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10369 if (!Res) {
10370 if (CGM.getLangOpts().OpenMPIsDevice) {
10371 // Register non-target variables being emitted in device code (debug info
10372 // may cause this).
10373 StringRef VarName = CGM.getMangledName(VD);
10374 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10376 return;
10378 // Register declare target variables.
10379 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
10380 StringRef VarName;
10381 int64_t VarSize;
10382 llvm::GlobalValue::LinkageTypes Linkage;
10384 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10385 !HasRequiresUnifiedSharedMemory) {
10386 Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10387 VarName = CGM.getMangledName(VD);
10388 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10389 VarSize =
10390 CGM.getContext().getTypeSizeInChars(VD->getType()).getQuantity();
10391 assert(VarSize != 0 && "Expected non-zero size of the variable");
10392 } else {
10393 VarSize = 0;
10395 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10396 // Temp solution to prevent optimizations of the internal variables.
10397 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10398 // Do not create a "ref-variable" if the original is not also available
10399 // on the host.
10400 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10401 return;
10402 std::string RefName = getName({VarName, "ref"});
10403 if (!CGM.GetGlobalValue(RefName)) {
10404 llvm::Constant *AddrRef =
10405 getOrCreateInternalVariable(Addr->getType(), RefName);
10406 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10407 GVAddrRef->setConstant(/*Val=*/true);
10408 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10409 GVAddrRef->setInitializer(Addr);
10410 CGM.addCompilerUsedGlobal(GVAddrRef);
10413 } else {
10414 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10415 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10416 HasRequiresUnifiedSharedMemory)) &&
10417 "Declare target attribute must link or to with unified memory.");
10418 if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10419 Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
10420 else
10421 Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10423 if (CGM.getLangOpts().OpenMPIsDevice) {
10424 VarName = Addr->getName();
10425 Addr = nullptr;
10426 } else {
10427 VarName = getAddrOfDeclareTargetVar(VD).getName();
10428 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10430 VarSize = CGM.getPointerSize().getQuantity();
10431 Linkage = llvm::GlobalValue::WeakAnyLinkage;
10434 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10435 VarName, Addr, VarSize, Flags, Linkage, CGM.getLangOpts().OpenMPIsDevice);
10438 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10439 if (isa<FunctionDecl>(GD.getDecl()) ||
10440 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10441 return emitTargetFunctions(GD);
10443 return emitTargetGlobalVariable(GD);
10446 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10447 for (const VarDecl *VD : DeferredGlobalVariables) {
10448 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10449 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10450 if (!Res)
10451 continue;
10452 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10453 !HasRequiresUnifiedSharedMemory) {
10454 CGM.EmitGlobal(VD);
10455 } else {
10456 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10457 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10458 HasRequiresUnifiedSharedMemory)) &&
10459 "Expected link clause or to clause with unified memory.");
10460 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10465 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10466 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10467 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10468 " Expected target-based directive.");
10471 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10472 for (const OMPClause *Clause : D->clauselists()) {
10473 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10474 HasRequiresUnifiedSharedMemory = true;
10475 } else if (const auto *AC =
10476 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10477 switch (AC->getAtomicDefaultMemOrderKind()) {
10478 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10479 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10480 break;
10481 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10482 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10483 break;
10484 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10485 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10486 break;
10487 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10488 break;
10494 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10495 return RequiresAtomicOrdering;
10498 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10499 LangAS &AS) {
10500 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10501 return false;
10502 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10503 switch(A->getAllocatorType()) {
10504 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10505 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10506 // Not supported, fallback to the default mem space.
10507 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10508 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10509 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10510 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10511 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10512 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10513 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10514 AS = LangAS::Default;
10515 return true;
10516 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10517 llvm_unreachable("Expected predefined allocator for the variables with the "
10518 "static storage.");
10520 return false;
10523 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10524 return HasRequiresUnifiedSharedMemory;
10527 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10528 CodeGenModule &CGM)
10529 : CGM(CGM) {
10530 if (CGM.getLangOpts().OpenMPIsDevice) {
10531 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10532 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10536 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10537 if (CGM.getLangOpts().OpenMPIsDevice)
10538 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10541 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10542 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10543 return true;
10545 const auto *D = cast<FunctionDecl>(GD.getDecl());
10546 // Do not to emit function if it is marked as declare target as it was already
10547 // emitted.
10548 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10549 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10550 if (auto *F = dyn_cast_or_null<llvm::Function>(
10551 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10552 return !F->isDeclaration();
10553 return false;
10555 return true;
10558 return !AlreadyEmittedTargetDecls.insert(D).second;
10561 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10562 // If we don't have entries or if we are emitting code for the device, we
10563 // don't need to do anything.
10564 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10565 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10566 (OffloadEntriesInfoManager.empty() &&
10567 !HasEmittedDeclareTargetRegion &&
10568 !HasEmittedTargetRegion))
10569 return nullptr;
10571 // Create and register the function that handles the requires directives.
10572 ASTContext &C = CGM.getContext();
10574 llvm::Function *RequiresRegFn;
10576 CodeGenFunction CGF(CGM);
10577 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10578 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10579 std::string ReqName = getName({"omp_offloading", "requires_reg"});
10580 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10581 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10582 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10583 // TODO: check for other requires clauses.
10584 // The requires directive takes effect only when a target region is
10585 // present in the compilation unit. Otherwise it is ignored and not
10586 // passed to the runtime. This avoids the runtime from throwing an error
10587 // for mismatching requires clauses across compilation units that don't
10588 // contain at least 1 target region.
10589 assert((HasEmittedTargetRegion ||
10590 HasEmittedDeclareTargetRegion ||
10591 !OffloadEntriesInfoManager.empty()) &&
10592 "Target or declare target region expected.");
10593 if (HasRequiresUnifiedSharedMemory)
10594 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10595 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10596 CGM.getModule(), OMPRTL___tgt_register_requires),
10597 llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10598 CGF.FinishFunction();
10600 return RequiresRegFn;
10603 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10604 const OMPExecutableDirective &D,
10605 SourceLocation Loc,
10606 llvm::Function *OutlinedFn,
10607 ArrayRef<llvm::Value *> CapturedVars) {
10608 if (!CGF.HaveInsertPoint())
10609 return;
10611 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10612 CodeGenFunction::RunCleanupsScope Scope(CGF);
10614 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10615 llvm::Value *Args[] = {
10616 RTLoc,
10617 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10618 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10619 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10620 RealArgs.append(std::begin(Args), std::end(Args));
10621 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10623 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10624 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10625 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10628 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10629 const Expr *NumTeams,
10630 const Expr *ThreadLimit,
10631 SourceLocation Loc) {
10632 if (!CGF.HaveInsertPoint())
10633 return;
10635 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10637 llvm::Value *NumTeamsVal =
10638 NumTeams
10639 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10640 CGF.CGM.Int32Ty, /* isSigned = */ true)
10641 : CGF.Builder.getInt32(0);
10643 llvm::Value *ThreadLimitVal =
10644 ThreadLimit
10645 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10646 CGF.CGM.Int32Ty, /* isSigned = */ true)
10647 : CGF.Builder.getInt32(0);
10649 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10650 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10651 ThreadLimitVal};
10652 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10653 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10654 PushNumTeamsArgs);
10657 void CGOpenMPRuntime::emitTargetDataCalls(
10658 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10659 const Expr *Device, const RegionCodeGenTy &CodeGen,
10660 CGOpenMPRuntime::TargetDataInfo &Info) {
10661 if (!CGF.HaveInsertPoint())
10662 return;
10664 // Action used to replace the default codegen action and turn privatization
10665 // off.
10666 PrePostActionTy NoPrivAction;
10668 // Generate the code for the opening of the data environment. Capture all the
10669 // arguments of the runtime call by reference because they are used in the
10670 // closing of the region.
10671 auto &&BeginThenGen = [this, &D, Device, &Info,
10672 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10673 // Fill up the arrays with all the mapped variables.
10674 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10676 // Get map clause information.
10677 MappableExprsHandler MEHandler(D, CGF);
10678 MEHandler.generateAllInfo(CombinedInfo);
10680 // Fill up the arrays and create the arguments.
10681 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10682 /*IsNonContiguous=*/true);
10684 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs;
10685 bool EmitDebug =
10686 CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
10687 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info,
10688 EmitDebug);
10690 // Emit device ID if any.
10691 llvm::Value *DeviceID = nullptr;
10692 if (Device) {
10693 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10694 CGF.Int64Ty, /*isSigned=*/true);
10695 } else {
10696 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10699 // Emit the number of elements in the offloading arrays.
10700 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10702 // Source location for the ident struct
10703 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10705 llvm::Value *OffloadingArgs[] = {RTLoc,
10706 DeviceID,
10707 PointerNum,
10708 RTArgs.BasePointersArray,
10709 RTArgs.PointersArray,
10710 RTArgs.SizesArray,
10711 RTArgs.MapTypesArray,
10712 RTArgs.MapNamesArray,
10713 RTArgs.MappersArray};
10714 CGF.EmitRuntimeCall(
10715 OMPBuilder.getOrCreateRuntimeFunction(
10716 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10717 OffloadingArgs);
10719 // If device pointer privatization is required, emit the body of the region
10720 // here. It will have to be duplicated: with and without privatization.
10721 if (!Info.CaptureDeviceAddrMap.empty())
10722 CodeGen(CGF);
10725 // Generate code for the closing of the data region.
10726 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10727 PrePostActionTy &) {
10728 assert(Info.isValid() && "Invalid data environment closing arguments.");
10730 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs;
10731 bool EmitDebug =
10732 CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
10733 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info,
10734 EmitDebug,
10735 /*ForEndCall=*/true);
10737 // Emit device ID if any.
10738 llvm::Value *DeviceID = nullptr;
10739 if (Device) {
10740 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10741 CGF.Int64Ty, /*isSigned=*/true);
10742 } else {
10743 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10746 // Emit the number of elements in the offloading arrays.
10747 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10749 // Source location for the ident struct
10750 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10752 llvm::Value *OffloadingArgs[] = {RTLoc,
10753 DeviceID,
10754 PointerNum,
10755 RTArgs.BasePointersArray,
10756 RTArgs.PointersArray,
10757 RTArgs.SizesArray,
10758 RTArgs.MapTypesArray,
10759 RTArgs.MapNamesArray,
10760 RTArgs.MappersArray};
10761 CGF.EmitRuntimeCall(
10762 OMPBuilder.getOrCreateRuntimeFunction(
10763 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10764 OffloadingArgs);
10767 // If we need device pointer privatization, we need to emit the body of the
10768 // region with no privatization in the 'else' branch of the conditional.
10769 // Otherwise, we don't have to do anything.
10770 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10771 PrePostActionTy &) {
10772 if (!Info.CaptureDeviceAddrMap.empty()) {
10773 CodeGen.setAction(NoPrivAction);
10774 CodeGen(CGF);
10778 // We don't have to do anything to close the region if the if clause evaluates
10779 // to false.
10780 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10782 if (IfCond) {
10783 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10784 } else {
10785 RegionCodeGenTy RCG(BeginThenGen);
10786 RCG(CGF);
10789 // If we don't require privatization of device pointers, we emit the body in
10790 // between the runtime calls. This avoids duplicating the body code.
10791 if (Info.CaptureDeviceAddrMap.empty()) {
10792 CodeGen.setAction(NoPrivAction);
10793 CodeGen(CGF);
10796 if (IfCond) {
10797 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10798 } else {
10799 RegionCodeGenTy RCG(EndThenGen);
10800 RCG(CGF);
10804 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10805 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10806 const Expr *Device) {
10807 if (!CGF.HaveInsertPoint())
10808 return;
10810 assert((isa<OMPTargetEnterDataDirective>(D) ||
10811 isa<OMPTargetExitDataDirective>(D) ||
10812 isa<OMPTargetUpdateDirective>(D)) &&
10813 "Expecting either target enter, exit data, or update directives.");
10815 CodeGenFunction::OMPTargetDataInfo InputInfo;
10816 llvm::Value *MapTypesArray = nullptr;
10817 llvm::Value *MapNamesArray = nullptr;
10818 // Generate the code for the opening of the data environment.
10819 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10820 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10821 // Emit device ID if any.
10822 llvm::Value *DeviceID = nullptr;
10823 if (Device) {
10824 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10825 CGF.Int64Ty, /*isSigned=*/true);
10826 } else {
10827 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10830 // Emit the number of elements in the offloading arrays.
10831 llvm::Constant *PointerNum =
10832 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10834 // Source location for the ident struct
10835 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10837 llvm::Value *OffloadingArgs[] = {RTLoc,
10838 DeviceID,
10839 PointerNum,
10840 InputInfo.BasePointersArray.getPointer(),
10841 InputInfo.PointersArray.getPointer(),
10842 InputInfo.SizesArray.getPointer(),
10843 MapTypesArray,
10844 MapNamesArray,
10845 InputInfo.MappersArray.getPointer()};
10847 // Select the right runtime function call for each standalone
10848 // directive.
10849 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10850 RuntimeFunction RTLFn;
10851 switch (D.getDirectiveKind()) {
10852 case OMPD_target_enter_data:
10853 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10854 : OMPRTL___tgt_target_data_begin_mapper;
10855 break;
10856 case OMPD_target_exit_data:
10857 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10858 : OMPRTL___tgt_target_data_end_mapper;
10859 break;
10860 case OMPD_target_update:
10861 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10862 : OMPRTL___tgt_target_data_update_mapper;
10863 break;
10864 case OMPD_parallel:
10865 case OMPD_for:
10866 case OMPD_parallel_for:
10867 case OMPD_parallel_master:
10868 case OMPD_parallel_sections:
10869 case OMPD_for_simd:
10870 case OMPD_parallel_for_simd:
10871 case OMPD_cancel:
10872 case OMPD_cancellation_point:
10873 case OMPD_ordered:
10874 case OMPD_threadprivate:
10875 case OMPD_allocate:
10876 case OMPD_task:
10877 case OMPD_simd:
10878 case OMPD_tile:
10879 case OMPD_unroll:
10880 case OMPD_sections:
10881 case OMPD_section:
10882 case OMPD_single:
10883 case OMPD_master:
10884 case OMPD_critical:
10885 case OMPD_taskyield:
10886 case OMPD_barrier:
10887 case OMPD_taskwait:
10888 case OMPD_taskgroup:
10889 case OMPD_atomic:
10890 case OMPD_flush:
10891 case OMPD_depobj:
10892 case OMPD_scan:
10893 case OMPD_teams:
10894 case OMPD_target_data:
10895 case OMPD_distribute:
10896 case OMPD_distribute_simd:
10897 case OMPD_distribute_parallel_for:
10898 case OMPD_distribute_parallel_for_simd:
10899 case OMPD_teams_distribute:
10900 case OMPD_teams_distribute_simd:
10901 case OMPD_teams_distribute_parallel_for:
10902 case OMPD_teams_distribute_parallel_for_simd:
10903 case OMPD_declare_simd:
10904 case OMPD_declare_variant:
10905 case OMPD_begin_declare_variant:
10906 case OMPD_end_declare_variant:
10907 case OMPD_declare_target:
10908 case OMPD_end_declare_target:
10909 case OMPD_declare_reduction:
10910 case OMPD_declare_mapper:
10911 case OMPD_taskloop:
10912 case OMPD_taskloop_simd:
10913 case OMPD_master_taskloop:
10914 case OMPD_master_taskloop_simd:
10915 case OMPD_parallel_master_taskloop:
10916 case OMPD_parallel_master_taskloop_simd:
10917 case OMPD_target:
10918 case OMPD_target_simd:
10919 case OMPD_target_teams_distribute:
10920 case OMPD_target_teams_distribute_simd:
10921 case OMPD_target_teams_distribute_parallel_for:
10922 case OMPD_target_teams_distribute_parallel_for_simd:
10923 case OMPD_target_teams:
10924 case OMPD_target_parallel:
10925 case OMPD_target_parallel_for:
10926 case OMPD_target_parallel_for_simd:
10927 case OMPD_requires:
10928 case OMPD_metadirective:
10929 case OMPD_unknown:
10930 default:
10931 llvm_unreachable("Unexpected standalone target data directive.");
10932 break;
10934 CGF.EmitRuntimeCall(
10935 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10936 OffloadingArgs);
10939 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10940 &MapNamesArray](CodeGenFunction &CGF,
10941 PrePostActionTy &) {
10942 // Fill up the arrays with all the mapped variables.
10943 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10945 // Get map clause information.
10946 MappableExprsHandler MEHandler(D, CGF);
10947 MEHandler.generateAllInfo(CombinedInfo);
10949 CGOpenMPRuntime::TargetDataInfo Info;
10950 // Fill up the arrays and create the arguments.
10951 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10952 /*IsNonContiguous=*/true);
10953 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10954 D.hasClausesOfKind<OMPNowaitClause>();
10955 bool EmitDebug =
10956 CGF.CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo;
10957 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10958 EmitDebug,
10959 /*ForEndCall=*/false);
10960 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10961 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10962 CGF.VoidPtrTy, CGM.getPointerAlign());
10963 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10964 CGM.getPointerAlign());
10965 InputInfo.SizesArray =
10966 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10967 InputInfo.MappersArray =
10968 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10969 MapTypesArray = Info.RTArgs.MapTypesArray;
10970 MapNamesArray = Info.RTArgs.MapNamesArray;
10971 if (RequiresOuterTask)
10972 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10973 else
10974 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10977 if (IfCond) {
10978 emitIfClause(CGF, IfCond, TargetThenGen,
10979 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10980 } else {
10981 RegionCodeGenTy ThenRCG(TargetThenGen);
10982 ThenRCG(CGF);
10986 namespace {
10987 /// Kind of parameter in a function with 'declare simd' directive.
10988 enum ParamKindTy {
10989 Linear,
10990 LinearRef,
10991 LinearUVal,
10992 LinearVal,
10993 Uniform,
10994 Vector,
10996 /// Attribute set of the parameter.
10997 struct ParamAttrTy {
10998 ParamKindTy Kind = Vector;
10999 llvm::APSInt StrideOrArg;
11000 llvm::APSInt Alignment;
11001 bool HasVarStride = false;
11003 } // namespace
11005 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11006 ArrayRef<ParamAttrTy> ParamAttrs) {
11007 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11008 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11009 // of that clause. The VLEN value must be power of 2.
11010 // In other case the notion of the function`s "characteristic data type" (CDT)
11011 // is used to compute the vector length.
11012 // CDT is defined in the following order:
11013 // a) For non-void function, the CDT is the return type.
11014 // b) If the function has any non-uniform, non-linear parameters, then the
11015 // CDT is the type of the first such parameter.
11016 // c) If the CDT determined by a) or b) above is struct, union, or class
11017 // type which is pass-by-value (except for the type that maps to the
11018 // built-in complex data type), the characteristic data type is int.
11019 // d) If none of the above three cases is applicable, the CDT is int.
11020 // The VLEN is then determined based on the CDT and the size of vector
11021 // register of that ISA for which current vector version is generated. The
11022 // VLEN is computed using the formula below:
11023 // VLEN = sizeof(vector_register) / sizeof(CDT),
11024 // where vector register size specified in section 3.2.1 Registers and the
11025 // Stack Frame of original AMD64 ABI document.
11026 QualType RetType = FD->getReturnType();
11027 if (RetType.isNull())
11028 return 0;
11029 ASTContext &C = FD->getASTContext();
11030 QualType CDT;
11031 if (!RetType.isNull() && !RetType->isVoidType()) {
11032 CDT = RetType;
11033 } else {
11034 unsigned Offset = 0;
11035 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11036 if (ParamAttrs[Offset].Kind == Vector)
11037 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11038 ++Offset;
11040 if (CDT.isNull()) {
11041 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11042 if (ParamAttrs[I + Offset].Kind == Vector) {
11043 CDT = FD->getParamDecl(I)->getType();
11044 break;
11049 if (CDT.isNull())
11050 CDT = C.IntTy;
11051 CDT = CDT->getCanonicalTypeUnqualified();
11052 if (CDT->isRecordType() || CDT->isUnionType())
11053 CDT = C.IntTy;
11054 return C.getTypeSize(CDT);
11057 /// Mangle the parameter part of the vector function name according to
11058 /// their OpenMP classification. The mangling function is defined in
11059 /// section 4.5 of the AAVFABI(2021Q1).
11060 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11061 SmallString<256> Buffer;
11062 llvm::raw_svector_ostream Out(Buffer);
11063 for (const auto &ParamAttr : ParamAttrs) {
11064 switch (ParamAttr.Kind) {
11065 case Linear:
11066 Out << 'l';
11067 break;
11068 case LinearRef:
11069 Out << 'R';
11070 break;
11071 case LinearUVal:
11072 Out << 'U';
11073 break;
11074 case LinearVal:
11075 Out << 'L';
11076 break;
11077 case Uniform:
11078 Out << 'u';
11079 break;
11080 case Vector:
11081 Out << 'v';
11082 break;
11084 if (ParamAttr.HasVarStride)
11085 Out << "s" << ParamAttr.StrideOrArg;
11086 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11087 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11088 // Don't print the step value if it is not present or if it is
11089 // equal to 1.
11090 if (ParamAttr.StrideOrArg < 0)
11091 Out << 'n' << -ParamAttr.StrideOrArg;
11092 else if (ParamAttr.StrideOrArg != 1)
11093 Out << ParamAttr.StrideOrArg;
11096 if (!!ParamAttr.Alignment)
11097 Out << 'a' << ParamAttr.Alignment;
11100 return std::string(Out.str());
11103 static void
11104 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11105 const llvm::APSInt &VLENVal,
11106 ArrayRef<ParamAttrTy> ParamAttrs,
11107 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11108 struct ISADataTy {
11109 char ISA;
11110 unsigned VecRegSize;
11112 ISADataTy ISAData[] = {
11114 'b', 128
11115 }, // SSE
11117 'c', 256
11118 }, // AVX
11120 'd', 256
11121 }, // AVX2
11123 'e', 512
11124 }, // AVX512
11126 llvm::SmallVector<char, 2> Masked;
11127 switch (State) {
11128 case OMPDeclareSimdDeclAttr::BS_Undefined:
11129 Masked.push_back('N');
11130 Masked.push_back('M');
11131 break;
11132 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11133 Masked.push_back('N');
11134 break;
11135 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11136 Masked.push_back('M');
11137 break;
11139 for (char Mask : Masked) {
11140 for (const ISADataTy &Data : ISAData) {
11141 SmallString<256> Buffer;
11142 llvm::raw_svector_ostream Out(Buffer);
11143 Out << "_ZGV" << Data.ISA << Mask;
11144 if (!VLENVal) {
11145 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11146 assert(NumElts && "Non-zero simdlen/cdtsize expected");
11147 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11148 } else {
11149 Out << VLENVal;
11151 Out << mangleVectorParameters(ParamAttrs);
11152 Out << '_' << Fn->getName();
11153 Fn->addFnAttr(Out.str());
11158 // This are the Functions that are needed to mangle the name of the
11159 // vector functions generated by the compiler, according to the rules
11160 // defined in the "Vector Function ABI specifications for AArch64",
11161 // available at
11162 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11164 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11165 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11166 QT = QT.getCanonicalType();
11168 if (QT->isVoidType())
11169 return false;
11171 if (Kind == ParamKindTy::Uniform)
11172 return false;
11174 if (Kind == ParamKindTy::LinearUVal || ParamKindTy::LinearRef)
11175 return false;
11177 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
11178 !QT->isReferenceType())
11179 return false;
11181 return true;
11184 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11185 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11186 QT = QT.getCanonicalType();
11187 unsigned Size = C.getTypeSize(QT);
11189 // Only scalars and complex within 16 bytes wide set PVB to true.
11190 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11191 return false;
11193 if (QT->isFloatingType())
11194 return true;
11196 if (QT->isIntegerType())
11197 return true;
11199 if (QT->isPointerType())
11200 return true;
11202 // TODO: Add support for complex types (section 3.1.2, item 2).
11204 return false;
11207 /// Computes the lane size (LS) of a return type or of an input parameter,
11208 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11209 /// TODO: Add support for references, section 3.2.1, item 1.
11210 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11211 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11212 QualType PTy = QT.getCanonicalType()->getPointeeType();
11213 if (getAArch64PBV(PTy, C))
11214 return C.getTypeSize(PTy);
11216 if (getAArch64PBV(QT, C))
11217 return C.getTypeSize(QT);
11219 return C.getTypeSize(C.getUIntPtrType());
11222 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11223 // signature of the scalar function, as defined in 3.2.2 of the
11224 // AAVFABI.
11225 static std::tuple<unsigned, unsigned, bool>
11226 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11227 QualType RetType = FD->getReturnType().getCanonicalType();
11229 ASTContext &C = FD->getASTContext();
11231 bool OutputBecomesInput = false;
11233 llvm::SmallVector<unsigned, 8> Sizes;
11234 if (!RetType->isVoidType()) {
11235 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11236 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11237 OutputBecomesInput = true;
11239 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11240 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11241 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11244 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11245 // The LS of a function parameter / return value can only be a power
11246 // of 2, starting from 8 bits, up to 128.
11247 assert(llvm::all_of(Sizes,
11248 [](unsigned Size) {
11249 return Size == 8 || Size == 16 || Size == 32 ||
11250 Size == 64 || Size == 128;
11251 }) &&
11252 "Invalid size");
11254 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11255 *std::max_element(std::begin(Sizes), std::end(Sizes)),
11256 OutputBecomesInput);
11259 // Function used to add the attribute. The parameter `VLEN` is
11260 // templated to allow the use of "x" when targeting scalable functions
11261 // for SVE.
11262 template <typename T>
11263 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11264 char ISA, StringRef ParSeq,
11265 StringRef MangledName, bool OutputBecomesInput,
11266 llvm::Function *Fn) {
11267 SmallString<256> Buffer;
11268 llvm::raw_svector_ostream Out(Buffer);
11269 Out << Prefix << ISA << LMask << VLEN;
11270 if (OutputBecomesInput)
11271 Out << "v";
11272 Out << ParSeq << "_" << MangledName;
11273 Fn->addFnAttr(Out.str());
11276 // Helper function to generate the Advanced SIMD names depending on
11277 // the value of the NDS when simdlen is not present.
11278 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11279 StringRef Prefix, char ISA,
11280 StringRef ParSeq, StringRef MangledName,
11281 bool OutputBecomesInput,
11282 llvm::Function *Fn) {
11283 switch (NDS) {
11284 case 8:
11285 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11286 OutputBecomesInput, Fn);
11287 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11288 OutputBecomesInput, Fn);
11289 break;
11290 case 16:
11291 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11292 OutputBecomesInput, Fn);
11293 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11294 OutputBecomesInput, Fn);
11295 break;
11296 case 32:
11297 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11298 OutputBecomesInput, Fn);
11299 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11300 OutputBecomesInput, Fn);
11301 break;
11302 case 64:
11303 case 128:
11304 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11305 OutputBecomesInput, Fn);
11306 break;
11307 default:
11308 llvm_unreachable("Scalar type is too wide.");
11312 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11313 static void emitAArch64DeclareSimdFunction(
11314 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11315 ArrayRef<ParamAttrTy> ParamAttrs,
11316 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11317 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11319 // Get basic data for building the vector signature.
11320 const auto Data = getNDSWDS(FD, ParamAttrs);
11321 const unsigned NDS = std::get<0>(Data);
11322 const unsigned WDS = std::get<1>(Data);
11323 const bool OutputBecomesInput = std::get<2>(Data);
11325 // Check the values provided via `simdlen` by the user.
11326 // 1. A `simdlen(1)` doesn't produce vector signatures,
11327 if (UserVLEN == 1) {
11328 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11329 DiagnosticsEngine::Warning,
11330 "The clause simdlen(1) has no effect when targeting aarch64.");
11331 CGM.getDiags().Report(SLoc, DiagID);
11332 return;
11335 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11336 // Advanced SIMD output.
11337 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11338 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11339 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11340 "power of 2 when targeting Advanced SIMD.");
11341 CGM.getDiags().Report(SLoc, DiagID);
11342 return;
11345 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11346 // limits.
11347 if (ISA == 's' && UserVLEN != 0) {
11348 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11349 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11350 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11351 "lanes in the architectural constraints "
11352 "for SVE (min is 128-bit, max is "
11353 "2048-bit, by steps of 128-bit)");
11354 CGM.getDiags().Report(SLoc, DiagID) << WDS;
11355 return;
11359 // Sort out parameter sequence.
11360 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11361 StringRef Prefix = "_ZGV";
11362 // Generate simdlen from user input (if any).
11363 if (UserVLEN) {
11364 if (ISA == 's') {
11365 // SVE generates only a masked function.
11366 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11367 OutputBecomesInput, Fn);
11368 } else {
11369 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11370 // Advanced SIMD generates one or two functions, depending on
11371 // the `[not]inbranch` clause.
11372 switch (State) {
11373 case OMPDeclareSimdDeclAttr::BS_Undefined:
11374 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11375 OutputBecomesInput, Fn);
11376 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11377 OutputBecomesInput, Fn);
11378 break;
11379 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11380 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11381 OutputBecomesInput, Fn);
11382 break;
11383 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11384 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11385 OutputBecomesInput, Fn);
11386 break;
11389 } else {
11390 // If no user simdlen is provided, follow the AAVFABI rules for
11391 // generating the vector length.
11392 if (ISA == 's') {
11393 // SVE, section 3.4.1, item 1.
11394 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11395 OutputBecomesInput, Fn);
11396 } else {
11397 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11398 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11399 // two vector names depending on the use of the clause
11400 // `[not]inbranch`.
11401 switch (State) {
11402 case OMPDeclareSimdDeclAttr::BS_Undefined:
11403 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11404 OutputBecomesInput, Fn);
11405 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11406 OutputBecomesInput, Fn);
11407 break;
11408 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11409 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11410 OutputBecomesInput, Fn);
11411 break;
11412 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11413 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11414 OutputBecomesInput, Fn);
11415 break;
11421 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11422 llvm::Function *Fn) {
11423 ASTContext &C = CGM.getContext();
11424 FD = FD->getMostRecentDecl();
11425 while (FD) {
11426 // Map params to their positions in function decl.
11427 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11428 if (isa<CXXMethodDecl>(FD))
11429 ParamPositions.try_emplace(FD, 0);
11430 unsigned ParamPos = ParamPositions.size();
11431 for (const ParmVarDecl *P : FD->parameters()) {
11432 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11433 ++ParamPos;
11435 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11436 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11437 // Mark uniform parameters.
11438 for (const Expr *E : Attr->uniforms()) {
11439 E = E->IgnoreParenImpCasts();
11440 unsigned Pos;
11441 if (isa<CXXThisExpr>(E)) {
11442 Pos = ParamPositions[FD];
11443 } else {
11444 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11445 ->getCanonicalDecl();
11446 auto It = ParamPositions.find(PVD);
11447 assert(It != ParamPositions.end() && "Function parameter not found");
11448 Pos = It->second;
11450 ParamAttrs[Pos].Kind = Uniform;
11452 // Get alignment info.
11453 auto *NI = Attr->alignments_begin();
11454 for (const Expr *E : Attr->aligneds()) {
11455 E = E->IgnoreParenImpCasts();
11456 unsigned Pos;
11457 QualType ParmTy;
11458 if (isa<CXXThisExpr>(E)) {
11459 Pos = ParamPositions[FD];
11460 ParmTy = E->getType();
11461 } else {
11462 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11463 ->getCanonicalDecl();
11464 auto It = ParamPositions.find(PVD);
11465 assert(It != ParamPositions.end() && "Function parameter not found");
11466 Pos = It->second;
11467 ParmTy = PVD->getType();
11469 ParamAttrs[Pos].Alignment =
11470 (*NI)
11471 ? (*NI)->EvaluateKnownConstInt(C)
11472 : llvm::APSInt::getUnsigned(
11473 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11474 .getQuantity());
11475 ++NI;
11477 // Mark linear parameters.
11478 auto *SI = Attr->steps_begin();
11479 auto *MI = Attr->modifiers_begin();
11480 for (const Expr *E : Attr->linears()) {
11481 E = E->IgnoreParenImpCasts();
11482 unsigned Pos;
11483 bool IsReferenceType = false;
11484 // Rescaling factor needed to compute the linear parameter
11485 // value in the mangled name.
11486 unsigned PtrRescalingFactor = 1;
11487 if (isa<CXXThisExpr>(E)) {
11488 Pos = ParamPositions[FD];
11489 auto *P = cast<PointerType>(E->getType());
11490 PtrRescalingFactor = CGM.getContext()
11491 .getTypeSizeInChars(P->getPointeeType())
11492 .getQuantity();
11493 } else {
11494 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11495 ->getCanonicalDecl();
11496 auto It = ParamPositions.find(PVD);
11497 assert(It != ParamPositions.end() && "Function parameter not found");
11498 Pos = It->second;
11499 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11500 PtrRescalingFactor = CGM.getContext()
11501 .getTypeSizeInChars(P->getPointeeType())
11502 .getQuantity();
11503 else if (PVD->getType()->isReferenceType()) {
11504 IsReferenceType = true;
11505 PtrRescalingFactor =
11506 CGM.getContext()
11507 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11508 .getQuantity();
11511 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11512 if (*MI == OMPC_LINEAR_ref)
11513 ParamAttr.Kind = LinearRef;
11514 else if (*MI == OMPC_LINEAR_uval)
11515 ParamAttr.Kind = LinearUVal;
11516 else if (IsReferenceType)
11517 ParamAttr.Kind = LinearVal;
11518 else
11519 ParamAttr.Kind = Linear;
11520 // Assuming a stride of 1, for `linear` without modifiers.
11521 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11522 if (*SI) {
11523 Expr::EvalResult Result;
11524 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11525 if (const auto *DRE =
11526 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11527 if (const auto *StridePVD =
11528 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11529 ParamAttr.HasVarStride = true;
11530 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11531 assert(It != ParamPositions.end() &&
11532 "Function parameter not found");
11533 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11536 } else {
11537 ParamAttr.StrideOrArg = Result.Val.getInt();
11540 // If we are using a linear clause on a pointer, we need to
11541 // rescale the value of linear_step with the byte size of the
11542 // pointee type.
11543 if (!ParamAttr.HasVarStride &&
11544 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11545 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11546 ++SI;
11547 ++MI;
11549 llvm::APSInt VLENVal;
11550 SourceLocation ExprLoc;
11551 const Expr *VLENExpr = Attr->getSimdlen();
11552 if (VLENExpr) {
11553 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11554 ExprLoc = VLENExpr->getExprLoc();
11556 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11557 if (CGM.getTriple().isX86()) {
11558 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11559 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11560 unsigned VLEN = VLENVal.getExtValue();
11561 StringRef MangledName = Fn->getName();
11562 if (CGM.getTarget().hasFeature("sve"))
11563 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11564 MangledName, 's', 128, Fn, ExprLoc);
11565 if (CGM.getTarget().hasFeature("neon"))
11566 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11567 MangledName, 'n', 128, Fn, ExprLoc);
11570 FD = FD->getPreviousDecl();
11574 namespace {
11575 /// Cleanup action for doacross support.
11576 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11577 public:
11578 static const int DoacrossFinArgs = 2;
11580 private:
11581 llvm::FunctionCallee RTLFn;
11582 llvm::Value *Args[DoacrossFinArgs];
11584 public:
11585 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11586 ArrayRef<llvm::Value *> CallArgs)
11587 : RTLFn(RTLFn) {
11588 assert(CallArgs.size() == DoacrossFinArgs);
11589 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11591 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11592 if (!CGF.HaveInsertPoint())
11593 return;
11594 CGF.EmitRuntimeCall(RTLFn, Args);
11597 } // namespace
11599 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11600 const OMPLoopDirective &D,
11601 ArrayRef<Expr *> NumIterations) {
11602 if (!CGF.HaveInsertPoint())
11603 return;
11605 ASTContext &C = CGM.getContext();
11606 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11607 RecordDecl *RD;
11608 if (KmpDimTy.isNull()) {
11609 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11610 // kmp_int64 lo; // lower
11611 // kmp_int64 up; // upper
11612 // kmp_int64 st; // stride
11613 // };
11614 RD = C.buildImplicitRecord("kmp_dim");
11615 RD->startDefinition();
11616 addFieldToRecordDecl(C, RD, Int64Ty);
11617 addFieldToRecordDecl(C, RD, Int64Ty);
11618 addFieldToRecordDecl(C, RD, Int64Ty);
11619 RD->completeDefinition();
11620 KmpDimTy = C.getRecordType(RD);
11621 } else {
11622 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11624 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11625 QualType ArrayTy =
11626 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11628 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11629 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11630 enum { LowerFD = 0, UpperFD, StrideFD };
11631 // Fill dims with data.
11632 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11633 LValue DimsLVal = CGF.MakeAddrLValue(
11634 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11635 // dims.upper = num_iterations;
11636 LValue UpperLVal = CGF.EmitLValueForField(
11637 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11638 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11639 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11640 Int64Ty, NumIterations[I]->getExprLoc());
11641 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11642 // dims.stride = 1;
11643 LValue StrideLVal = CGF.EmitLValueForField(
11644 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11645 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11646 StrideLVal);
11649 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11650 // kmp_int32 num_dims, struct kmp_dim * dims);
11651 llvm::Value *Args[] = {
11652 emitUpdateLocation(CGF, D.getBeginLoc()),
11653 getThreadID(CGF, D.getBeginLoc()),
11654 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11655 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11656 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11657 CGM.VoidPtrTy)};
11659 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11660 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11661 CGF.EmitRuntimeCall(RTLFn, Args);
11662 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11663 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11664 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11665 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11666 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11667 llvm::makeArrayRef(FiniArgs));
11670 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11671 const OMPDependClause *C) {
11672 QualType Int64Ty =
11673 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11674 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11675 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11676 Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11677 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11678 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11679 const Expr *CounterVal = C->getLoopData(I);
11680 assert(CounterVal);
11681 llvm::Value *CntVal = CGF.EmitScalarConversion(
11682 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11683 CounterVal->getExprLoc());
11684 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11685 /*Volatile=*/false, Int64Ty);
11687 llvm::Value *Args[] = {
11688 emitUpdateLocation(CGF, C->getBeginLoc()),
11689 getThreadID(CGF, C->getBeginLoc()),
11690 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11691 llvm::FunctionCallee RTLFn;
11692 if (C->getDependencyKind() == OMPC_DEPEND_source) {
11693 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11694 OMPRTL___kmpc_doacross_post);
11695 } else {
11696 assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11697 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11698 OMPRTL___kmpc_doacross_wait);
11700 CGF.EmitRuntimeCall(RTLFn, Args);
11703 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11704 llvm::FunctionCallee Callee,
11705 ArrayRef<llvm::Value *> Args) const {
11706 assert(Loc.isValid() && "Outlined function call location must be valid.");
11707 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11709 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11710 if (Fn->doesNotThrow()) {
11711 CGF.EmitNounwindRuntimeCall(Fn, Args);
11712 return;
11715 CGF.EmitRuntimeCall(Callee, Args);
11718 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11719 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11720 ArrayRef<llvm::Value *> Args) const {
11721 emitCall(CGF, Loc, OutlinedFn, Args);
11724 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11725 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11726 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11727 HasEmittedDeclareTargetRegion = true;
11730 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11731 const VarDecl *NativeParam,
11732 const VarDecl *TargetParam) const {
11733 return CGF.GetAddrOfLocalVar(NativeParam);
11736 /// Return allocator value from expression, or return a null allocator (default
11737 /// when no allocator specified).
11738 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11739 const Expr *Allocator) {
11740 llvm::Value *AllocVal;
11741 if (Allocator) {
11742 AllocVal = CGF.EmitScalarExpr(Allocator);
11743 // According to the standard, the original allocator type is a enum
11744 // (integer). Convert to pointer type, if required.
11745 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11746 CGF.getContext().VoidPtrTy,
11747 Allocator->getExprLoc());
11748 } else {
11749 // If no allocator specified, it defaults to the null allocator.
11750 AllocVal = llvm::Constant::getNullValue(
11751 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11753 return AllocVal;
11756 /// Return the alignment from an allocate directive if present.
11757 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11758 llvm::Optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11760 if (!AllocateAlignment)
11761 return nullptr;
11763 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11766 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11767 const VarDecl *VD) {
11768 if (!VD)
11769 return Address::invalid();
11770 Address UntiedAddr = Address::invalid();
11771 Address UntiedRealAddr = Address::invalid();
11772 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11773 if (It != FunctionToUntiedTaskStackMap.end()) {
11774 const UntiedLocalVarsAddressesMap &UntiedData =
11775 UntiedLocalVarsStack[It->second];
11776 auto I = UntiedData.find(VD);
11777 if (I != UntiedData.end()) {
11778 UntiedAddr = I->second.first;
11779 UntiedRealAddr = I->second.second;
11782 const VarDecl *CVD = VD->getCanonicalDecl();
11783 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11784 // Use the default allocation.
11785 if (!isAllocatableDecl(VD))
11786 return UntiedAddr;
11787 llvm::Value *Size;
11788 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11789 if (CVD->getType()->isVariablyModifiedType()) {
11790 Size = CGF.getTypeSize(CVD->getType());
11791 // Align the size: ((size + align - 1) / align) * align
11792 Size = CGF.Builder.CreateNUWAdd(
11793 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11794 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11795 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11796 } else {
11797 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11798 Size = CGM.getSize(Sz.alignTo(Align));
11800 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11801 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11802 const Expr *Allocator = AA->getAllocator();
11803 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11804 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11805 SmallVector<llvm::Value *, 4> Args;
11806 Args.push_back(ThreadID);
11807 if (Alignment)
11808 Args.push_back(Alignment);
11809 Args.push_back(Size);
11810 Args.push_back(AllocVal);
11811 llvm::omp::RuntimeFunction FnID =
11812 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11813 llvm::Value *Addr = CGF.EmitRuntimeCall(
11814 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11815 getName({CVD->getName(), ".void.addr"}));
11816 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11817 CGM.getModule(), OMPRTL___kmpc_free);
11818 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11819 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11820 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11821 if (UntiedAddr.isValid())
11822 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11824 // Cleanup action for allocate support.
11825 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11826 llvm::FunctionCallee RTLFn;
11827 SourceLocation::UIntTy LocEncoding;
11828 Address Addr;
11829 const Expr *AllocExpr;
11831 public:
11832 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11833 SourceLocation::UIntTy LocEncoding, Address Addr,
11834 const Expr *AllocExpr)
11835 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11836 AllocExpr(AllocExpr) {}
11837 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11838 if (!CGF.HaveInsertPoint())
11839 return;
11840 llvm::Value *Args[3];
11841 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11842 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11843 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11844 Addr.getPointer(), CGF.VoidPtrTy);
11845 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11846 Args[2] = AllocVal;
11847 CGF.EmitRuntimeCall(RTLFn, Args);
11850 Address VDAddr =
11851 UntiedRealAddr.isValid()
11852 ? UntiedRealAddr
11853 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11854 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11855 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11856 VDAddr, Allocator);
11857 if (UntiedRealAddr.isValid())
11858 if (auto *Region =
11859 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11860 Region->emitUntiedSwitch(CGF);
11861 return VDAddr;
11863 return UntiedAddr;
11866 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11867 const VarDecl *VD) const {
11868 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11869 if (It == FunctionToUntiedTaskStackMap.end())
11870 return false;
11871 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11874 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11875 CodeGenModule &CGM, const OMPLoopDirective &S)
11876 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11877 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11878 if (!NeedToPush)
11879 return;
11880 NontemporalDeclsSet &DS =
11881 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11882 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11883 for (const Stmt *Ref : C->private_refs()) {
11884 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11885 const ValueDecl *VD;
11886 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11887 VD = DRE->getDecl();
11888 } else {
11889 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11890 assert((ME->isImplicitCXXThis() ||
11891 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11892 "Expected member of current class.");
11893 VD = ME->getMemberDecl();
11895 DS.insert(VD);
11900 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11901 if (!NeedToPush)
11902 return;
11903 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11906 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11907 CodeGenFunction &CGF,
11908 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11909 std::pair<Address, Address>> &LocalVars)
11910 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11911 if (!NeedToPush)
11912 return;
11913 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11914 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11915 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11918 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11919 if (!NeedToPush)
11920 return;
11921 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11924 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11925 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11927 return llvm::any_of(
11928 CGM.getOpenMPRuntime().NontemporalDeclsStack,
11929 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11932 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11933 const OMPExecutableDirective &S,
11934 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11935 const {
11936 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11937 // Vars in target/task regions must be excluded completely.
11938 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11939 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11940 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11941 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11942 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11943 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11944 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11945 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11948 // Exclude vars in private clauses.
11949 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11950 for (const Expr *Ref : C->varlists()) {
11951 if (!Ref->getType()->isScalarType())
11952 continue;
11953 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11954 if (!DRE)
11955 continue;
11956 NeedToCheckForLPCs.insert(DRE->getDecl());
11959 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11960 for (const Expr *Ref : C->varlists()) {
11961 if (!Ref->getType()->isScalarType())
11962 continue;
11963 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11964 if (!DRE)
11965 continue;
11966 NeedToCheckForLPCs.insert(DRE->getDecl());
11969 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11970 for (const Expr *Ref : C->varlists()) {
11971 if (!Ref->getType()->isScalarType())
11972 continue;
11973 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11974 if (!DRE)
11975 continue;
11976 NeedToCheckForLPCs.insert(DRE->getDecl());
11979 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11980 for (const Expr *Ref : C->varlists()) {
11981 if (!Ref->getType()->isScalarType())
11982 continue;
11983 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11984 if (!DRE)
11985 continue;
11986 NeedToCheckForLPCs.insert(DRE->getDecl());
11989 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11990 for (const Expr *Ref : C->varlists()) {
11991 if (!Ref->getType()->isScalarType())
11992 continue;
11993 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11994 if (!DRE)
11995 continue;
11996 NeedToCheckForLPCs.insert(DRE->getDecl());
11999 for (const Decl *VD : NeedToCheckForLPCs) {
12000 for (const LastprivateConditionalData &Data :
12001 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12002 if (Data.DeclToUniqueName.count(VD) > 0) {
12003 if (!Data.Disabled)
12004 NeedToAddForLPCsAsDisabled.insert(VD);
12005 break;
12011 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12012 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12013 : CGM(CGF.CGM),
12014 Action((CGM.getLangOpts().OpenMP >= 50 &&
12015 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12016 [](const OMPLastprivateClause *C) {
12017 return C->getKind() ==
12018 OMPC_LASTPRIVATE_conditional;
12020 ? ActionToDo::PushAsLastprivateConditional
12021 : ActionToDo::DoNotPush) {
12022 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12023 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12024 return;
12025 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12026 "Expected a push action.");
12027 LastprivateConditionalData &Data =
12028 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12029 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12030 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12031 continue;
12033 for (const Expr *Ref : C->varlists()) {
12034 Data.DeclToUniqueName.insert(std::make_pair(
12035 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12036 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12039 Data.IVLVal = IVLVal;
12040 Data.Fn = CGF.CurFn;
12043 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12044 CodeGenFunction &CGF, const OMPExecutableDirective &S)
12045 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12046 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12047 if (CGM.getLangOpts().OpenMP < 50)
12048 return;
12049 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12050 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12051 if (!NeedToAddForLPCsAsDisabled.empty()) {
12052 Action = ActionToDo::DisableLastprivateConditional;
12053 LastprivateConditionalData &Data =
12054 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12055 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12056 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12057 Data.Fn = CGF.CurFn;
12058 Data.Disabled = true;
12062 CGOpenMPRuntime::LastprivateConditionalRAII
12063 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12064 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12065 return LastprivateConditionalRAII(CGF, S);
12068 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12069 if (CGM.getLangOpts().OpenMP < 50)
12070 return;
12071 if (Action == ActionToDo::DisableLastprivateConditional) {
12072 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12073 "Expected list of disabled private vars.");
12074 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12076 if (Action == ActionToDo::PushAsLastprivateConditional) {
12077 assert(
12078 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12079 "Expected list of lastprivate conditional vars.");
12080 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12084 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12085 const VarDecl *VD) {
12086 ASTContext &C = CGM.getContext();
12087 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12088 if (I == LastprivateConditionalToTypes.end())
12089 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12090 QualType NewType;
12091 const FieldDecl *VDField;
12092 const FieldDecl *FiredField;
12093 LValue BaseLVal;
12094 auto VI = I->getSecond().find(VD);
12095 if (VI == I->getSecond().end()) {
12096 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12097 RD->startDefinition();
12098 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12099 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12100 RD->completeDefinition();
12101 NewType = C.getRecordType(RD);
12102 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12103 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12104 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12105 } else {
12106 NewType = std::get<0>(VI->getSecond());
12107 VDField = std::get<1>(VI->getSecond());
12108 FiredField = std::get<2>(VI->getSecond());
12109 BaseLVal = std::get<3>(VI->getSecond());
12111 LValue FiredLVal =
12112 CGF.EmitLValueForField(BaseLVal, FiredField);
12113 CGF.EmitStoreOfScalar(
12114 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12115 FiredLVal);
12116 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12119 namespace {
12120 /// Checks if the lastprivate conditional variable is referenced in LHS.
12121 class LastprivateConditionalRefChecker final
12122 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12123 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12124 const Expr *FoundE = nullptr;
12125 const Decl *FoundD = nullptr;
12126 StringRef UniqueDeclName;
12127 LValue IVLVal;
12128 llvm::Function *FoundFn = nullptr;
12129 SourceLocation Loc;
12131 public:
12132 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12133 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12134 llvm::reverse(LPM)) {
12135 auto It = D.DeclToUniqueName.find(E->getDecl());
12136 if (It == D.DeclToUniqueName.end())
12137 continue;
12138 if (D.Disabled)
12139 return false;
12140 FoundE = E;
12141 FoundD = E->getDecl()->getCanonicalDecl();
12142 UniqueDeclName = It->second;
12143 IVLVal = D.IVLVal;
12144 FoundFn = D.Fn;
12145 break;
12147 return FoundE == E;
12149 bool VisitMemberExpr(const MemberExpr *E) {
12150 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12151 return false;
12152 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12153 llvm::reverse(LPM)) {
12154 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12155 if (It == D.DeclToUniqueName.end())
12156 continue;
12157 if (D.Disabled)
12158 return false;
12159 FoundE = E;
12160 FoundD = E->getMemberDecl()->getCanonicalDecl();
12161 UniqueDeclName = It->second;
12162 IVLVal = D.IVLVal;
12163 FoundFn = D.Fn;
12164 break;
12166 return FoundE == E;
12168 bool VisitStmt(const Stmt *S) {
12169 for (const Stmt *Child : S->children()) {
12170 if (!Child)
12171 continue;
12172 if (const auto *E = dyn_cast<Expr>(Child))
12173 if (!E->isGLValue())
12174 continue;
12175 if (Visit(Child))
12176 return true;
12178 return false;
12180 explicit LastprivateConditionalRefChecker(
12181 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12182 : LPM(LPM) {}
12183 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12184 getFoundData() const {
12185 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12188 } // namespace
12190 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12191 LValue IVLVal,
12192 StringRef UniqueDeclName,
12193 LValue LVal,
12194 SourceLocation Loc) {
12195 // Last updated loop counter for the lastprivate conditional var.
12196 // int<xx> last_iv = 0;
12197 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12198 llvm::Constant *LastIV =
12199 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12200 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12201 IVLVal.getAlignment().getAsAlign());
12202 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12204 // Last value of the lastprivate conditional.
12205 // decltype(priv_a) last_a;
12206 llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12207 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12208 Last->setAlignment(LVal.getAlignment().getAsAlign());
12209 LValue LastLVal = CGF.MakeAddrLValue(
12210 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12212 // Global loop counter. Required to handle inner parallel-for regions.
12213 // iv
12214 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12216 // #pragma omp critical(a)
12217 // if (last_iv <= iv) {
12218 // last_iv = iv;
12219 // last_a = priv_a;
12220 // }
12221 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12222 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12223 Action.Enter(CGF);
12224 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12225 // (last_iv <= iv) ? Check if the variable is updated and store new
12226 // value in global var.
12227 llvm::Value *CmpRes;
12228 if (IVLVal.getType()->isSignedIntegerType()) {
12229 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12230 } else {
12231 assert(IVLVal.getType()->isUnsignedIntegerType() &&
12232 "Loop iteration variable must be integer.");
12233 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12235 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12236 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12237 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12238 // {
12239 CGF.EmitBlock(ThenBB);
12241 // last_iv = iv;
12242 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12244 // last_a = priv_a;
12245 switch (CGF.getEvaluationKind(LVal.getType())) {
12246 case TEK_Scalar: {
12247 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12248 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12249 break;
12251 case TEK_Complex: {
12252 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12253 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12254 break;
12256 case TEK_Aggregate:
12257 llvm_unreachable(
12258 "Aggregates are not supported in lastprivate conditional.");
12260 // }
12261 CGF.EmitBranch(ExitBB);
12262 // There is no need to emit line number for unconditional branch.
12263 (void)ApplyDebugLocation::CreateEmpty(CGF);
12264 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12267 if (CGM.getLangOpts().OpenMPSimd) {
12268 // Do not emit as a critical region as no parallel region could be emitted.
12269 RegionCodeGenTy ThenRCG(CodeGen);
12270 ThenRCG(CGF);
12271 } else {
12272 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12276 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12277 const Expr *LHS) {
12278 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12279 return;
12280 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12281 if (!Checker.Visit(LHS))
12282 return;
12283 const Expr *FoundE;
12284 const Decl *FoundD;
12285 StringRef UniqueDeclName;
12286 LValue IVLVal;
12287 llvm::Function *FoundFn;
12288 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12289 Checker.getFoundData();
12290 if (FoundFn != CGF.CurFn) {
12291 // Special codegen for inner parallel regions.
12292 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12293 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12294 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12295 "Lastprivate conditional is not found in outer region.");
12296 QualType StructTy = std::get<0>(It->getSecond());
12297 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12298 LValue PrivLVal = CGF.EmitLValue(FoundE);
12299 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12300 PrivLVal.getAddress(CGF),
12301 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12302 CGF.ConvertTypeForMem(StructTy));
12303 LValue BaseLVal =
12304 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12305 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12306 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12307 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12308 FiredLVal, llvm::AtomicOrdering::Unordered,
12309 /*IsVolatile=*/true, /*isInit=*/false);
12310 return;
12313 // Private address of the lastprivate conditional in the current context.
12314 // priv_a
12315 LValue LVal = CGF.EmitLValue(FoundE);
12316 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12317 FoundE->getExprLoc());
12320 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12321 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12322 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12323 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12324 return;
12325 auto Range = llvm::reverse(LastprivateConditionalStack);
12326 auto It = llvm::find_if(
12327 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12328 if (It == Range.end() || It->Fn != CGF.CurFn)
12329 return;
12330 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12331 assert(LPCI != LastprivateConditionalToTypes.end() &&
12332 "Lastprivates must be registered already.");
12333 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12334 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12335 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12336 for (const auto &Pair : It->DeclToUniqueName) {
12337 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12338 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12339 continue;
12340 auto I = LPCI->getSecond().find(Pair.first);
12341 assert(I != LPCI->getSecond().end() &&
12342 "Lastprivate must be rehistered already.");
12343 // bool Cmp = priv_a.Fired != 0;
12344 LValue BaseLVal = std::get<3>(I->getSecond());
12345 LValue FiredLVal =
12346 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12347 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12348 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12349 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12350 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12351 // if (Cmp) {
12352 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12353 CGF.EmitBlock(ThenBB);
12354 Address Addr = CGF.GetAddrOfLocalVar(VD);
12355 LValue LVal;
12356 if (VD->getType()->isReferenceType())
12357 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12358 AlignmentSource::Decl);
12359 else
12360 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12361 AlignmentSource::Decl);
12362 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12363 D.getBeginLoc());
12364 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12365 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12366 // }
12370 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12371 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12372 SourceLocation Loc) {
12373 if (CGF.getLangOpts().OpenMP < 50)
12374 return;
12375 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12376 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12377 "Unknown lastprivate conditional variable.");
12378 StringRef UniqueName = It->second;
12379 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12380 // The variable was not updated in the region - exit.
12381 if (!GV)
12382 return;
12383 LValue LPLVal = CGF.MakeAddrLValue(
12384 Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12385 PrivLVal.getType().getNonReferenceType());
12386 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12387 CGF.EmitStoreOfScalar(Res, PrivLVal);
12390 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12391 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12392 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12393 llvm_unreachable("Not supported in SIMD-only mode");
12396 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12397 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12398 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12399 llvm_unreachable("Not supported in SIMD-only mode");
12402 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12403 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12404 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12405 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12406 bool Tied, unsigned &NumberOfParts) {
12407 llvm_unreachable("Not supported in SIMD-only mode");
12410 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12411 SourceLocation Loc,
12412 llvm::Function *OutlinedFn,
12413 ArrayRef<llvm::Value *> CapturedVars,
12414 const Expr *IfCond,
12415 llvm::Value *NumThreads) {
12416 llvm_unreachable("Not supported in SIMD-only mode");
12419 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12420 CodeGenFunction &CGF, StringRef CriticalName,
12421 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12422 const Expr *Hint) {
12423 llvm_unreachable("Not supported in SIMD-only mode");
12426 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12427 const RegionCodeGenTy &MasterOpGen,
12428 SourceLocation Loc) {
12429 llvm_unreachable("Not supported in SIMD-only mode");
12432 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12433 const RegionCodeGenTy &MasterOpGen,
12434 SourceLocation Loc,
12435 const Expr *Filter) {
12436 llvm_unreachable("Not supported in SIMD-only mode");
12439 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12440 SourceLocation Loc) {
12441 llvm_unreachable("Not supported in SIMD-only mode");
12444 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12445 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12446 SourceLocation Loc) {
12447 llvm_unreachable("Not supported in SIMD-only mode");
12450 void CGOpenMPSIMDRuntime::emitSingleRegion(
12451 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12452 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12453 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12454 ArrayRef<const Expr *> AssignmentOps) {
12455 llvm_unreachable("Not supported in SIMD-only mode");
12458 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12459 const RegionCodeGenTy &OrderedOpGen,
12460 SourceLocation Loc,
12461 bool IsThreads) {
12462 llvm_unreachable("Not supported in SIMD-only mode");
12465 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12466 SourceLocation Loc,
12467 OpenMPDirectiveKind Kind,
12468 bool EmitChecks,
12469 bool ForceSimpleCall) {
12470 llvm_unreachable("Not supported in SIMD-only mode");
12473 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12474 CodeGenFunction &CGF, SourceLocation Loc,
12475 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12476 bool Ordered, const DispatchRTInput &DispatchValues) {
12477 llvm_unreachable("Not supported in SIMD-only mode");
12480 void CGOpenMPSIMDRuntime::emitForStaticInit(
12481 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12482 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12483 llvm_unreachable("Not supported in SIMD-only mode");
12486 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12487 CodeGenFunction &CGF, SourceLocation Loc,
12488 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12489 llvm_unreachable("Not supported in SIMD-only mode");
12492 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12493 SourceLocation Loc,
12494 unsigned IVSize,
12495 bool IVSigned) {
12496 llvm_unreachable("Not supported in SIMD-only mode");
12499 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12500 SourceLocation Loc,
12501 OpenMPDirectiveKind DKind) {
12502 llvm_unreachable("Not supported in SIMD-only mode");
12505 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12506 SourceLocation Loc,
12507 unsigned IVSize, bool IVSigned,
12508 Address IL, Address LB,
12509 Address UB, Address ST) {
12510 llvm_unreachable("Not supported in SIMD-only mode");
12513 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12514 llvm::Value *NumThreads,
12515 SourceLocation Loc) {
12516 llvm_unreachable("Not supported in SIMD-only mode");
12519 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12520 ProcBindKind ProcBind,
12521 SourceLocation Loc) {
12522 llvm_unreachable("Not supported in SIMD-only mode");
12525 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12526 const VarDecl *VD,
12527 Address VDAddr,
12528 SourceLocation Loc) {
12529 llvm_unreachable("Not supported in SIMD-only mode");
12532 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12533 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12534 CodeGenFunction *CGF) {
12535 llvm_unreachable("Not supported in SIMD-only mode");
12538 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12539 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12540 llvm_unreachable("Not supported in SIMD-only mode");
12543 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12544 ArrayRef<const Expr *> Vars,
12545 SourceLocation Loc,
12546 llvm::AtomicOrdering AO) {
12547 llvm_unreachable("Not supported in SIMD-only mode");
12550 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12551 const OMPExecutableDirective &D,
12552 llvm::Function *TaskFunction,
12553 QualType SharedsTy, Address Shareds,
12554 const Expr *IfCond,
12555 const OMPTaskDataTy &Data) {
12556 llvm_unreachable("Not supported in SIMD-only mode");
12559 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12560 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12561 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12562 const Expr *IfCond, const OMPTaskDataTy &Data) {
12563 llvm_unreachable("Not supported in SIMD-only mode");
12566 void CGOpenMPSIMDRuntime::emitReduction(
12567 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12568 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12569 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12570 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12571 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12572 ReductionOps, Options);
12575 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12576 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12577 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12578 llvm_unreachable("Not supported in SIMD-only mode");
12581 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12582 SourceLocation Loc,
12583 bool IsWorksharingReduction) {
12584 llvm_unreachable("Not supported in SIMD-only mode");
12587 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12588 SourceLocation Loc,
12589 ReductionCodeGen &RCG,
12590 unsigned N) {
12591 llvm_unreachable("Not supported in SIMD-only mode");
12594 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12595 SourceLocation Loc,
12596 llvm::Value *ReductionsPtr,
12597 LValue SharedLVal) {
12598 llvm_unreachable("Not supported in SIMD-only mode");
12601 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12602 SourceLocation Loc,
12603 const OMPTaskDataTy &Data) {
12604 llvm_unreachable("Not supported in SIMD-only mode");
12607 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12608 CodeGenFunction &CGF, SourceLocation Loc,
12609 OpenMPDirectiveKind CancelRegion) {
12610 llvm_unreachable("Not supported in SIMD-only mode");
12613 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12614 SourceLocation Loc, const Expr *IfCond,
12615 OpenMPDirectiveKind CancelRegion) {
12616 llvm_unreachable("Not supported in SIMD-only mode");
12619 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12620 const OMPExecutableDirective &D, StringRef ParentName,
12621 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12622 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12623 llvm_unreachable("Not supported in SIMD-only mode");
12626 void CGOpenMPSIMDRuntime::emitTargetCall(
12627 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12628 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12629 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12630 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12631 const OMPLoopDirective &D)>
12632 SizeEmitter) {
12633 llvm_unreachable("Not supported in SIMD-only mode");
12636 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12637 llvm_unreachable("Not supported in SIMD-only mode");
12640 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12641 llvm_unreachable("Not supported in SIMD-only mode");
12644 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12645 return false;
12648 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12649 const OMPExecutableDirective &D,
12650 SourceLocation Loc,
12651 llvm::Function *OutlinedFn,
12652 ArrayRef<llvm::Value *> CapturedVars) {
12653 llvm_unreachable("Not supported in SIMD-only mode");
12656 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12657 const Expr *NumTeams,
12658 const Expr *ThreadLimit,
12659 SourceLocation Loc) {
12660 llvm_unreachable("Not supported in SIMD-only mode");
12663 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12664 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12665 const Expr *Device, const RegionCodeGenTy &CodeGen,
12666 CGOpenMPRuntime::TargetDataInfo &Info) {
12667 llvm_unreachable("Not supported in SIMD-only mode");
12670 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12671 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12672 const Expr *Device) {
12673 llvm_unreachable("Not supported in SIMD-only mode");
12676 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12677 const OMPLoopDirective &D,
12678 ArrayRef<Expr *> NumIterations) {
12679 llvm_unreachable("Not supported in SIMD-only mode");
12682 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12683 const OMPDependClause *C) {
12684 llvm_unreachable("Not supported in SIMD-only mode");
12687 const VarDecl *
12688 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12689 const VarDecl *NativeParam) const {
12690 llvm_unreachable("Not supported in SIMD-only mode");
12693 Address
12694 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12695 const VarDecl *NativeParam,
12696 const VarDecl *TargetParam) const {
12697 llvm_unreachable("Not supported in SIMD-only mode");