[clang] Handle __declspec() attributes in using
[llvm-project.git] / clang / lib / CodeGen / CGStmtOpenMP.cpp
blob6bc30ad0302e5c8aff5affcda98765c9aa4dc3a6
1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit OpenMP nodes as LLVM code.
11 //===----------------------------------------------------------------------===//
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/BinaryFormat/Dwarf.h"
29 #include "llvm/Frontend/OpenMP/OMPConstants.h"
30 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/DebugInfoMetadata.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Metadata.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include <optional>
38 using namespace clang;
39 using namespace CodeGen;
40 using namespace llvm::omp;
42 static const VarDecl *getBaseDecl(const Expr *Ref);
44 namespace {
45 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
46 /// for captured expressions.
47 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
48 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
49 for (const auto *C : S.clauses()) {
50 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
51 if (const auto *PreInit =
52 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
53 for (const auto *I : PreInit->decls()) {
54 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
55 CGF.EmitVarDecl(cast<VarDecl>(*I));
56 } else {
57 CodeGenFunction::AutoVarEmission Emission =
58 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
59 CGF.EmitAutoVarCleanups(Emission);
66 CodeGenFunction::OMPPrivateScope InlinedShareds;
68 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
69 return CGF.LambdaCaptureFields.lookup(VD) ||
70 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
71 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
72 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
75 public:
76 OMPLexicalScope(
77 CodeGenFunction &CGF, const OMPExecutableDirective &S,
78 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
79 const bool EmitPreInitStmt = true)
80 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
81 InlinedShareds(CGF) {
82 if (EmitPreInitStmt)
83 emitPreInitStmt(CGF, S);
84 if (!CapturedRegion)
85 return;
86 assert(S.hasAssociatedStmt() &&
87 "Expected associated statement for inlined directive.");
88 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
89 for (const auto &C : CS->captures()) {
90 if (C.capturesVariable() || C.capturesVariableByCopy()) {
91 auto *VD = C.getCapturedVar();
92 assert(VD == VD->getCanonicalDecl() &&
93 "Canonical decl must be captured.");
94 DeclRefExpr DRE(
95 CGF.getContext(), const_cast<VarDecl *>(VD),
96 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
97 InlinedShareds.isGlobalVarCaptured(VD)),
98 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
99 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
102 (void)InlinedShareds.Privatize();
106 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
107 /// for captured expressions.
108 class OMPParallelScope final : public OMPLexicalScope {
109 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
110 OpenMPDirectiveKind Kind = S.getDirectiveKind();
111 return !(isOpenMPTargetExecutionDirective(Kind) ||
112 isOpenMPLoopBoundSharingDirective(Kind)) &&
113 isOpenMPParallelDirective(Kind);
116 public:
117 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
118 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
119 EmitPreInitStmt(S)) {}
122 /// Lexical scope for OpenMP teams construct, that handles correct codegen
123 /// for captured expressions.
124 class OMPTeamsScope final : public OMPLexicalScope {
125 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
126 OpenMPDirectiveKind Kind = S.getDirectiveKind();
127 return !isOpenMPTargetExecutionDirective(Kind) &&
128 isOpenMPTeamsDirective(Kind);
131 public:
132 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
133 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
134 EmitPreInitStmt(S)) {}
137 /// Private scope for OpenMP loop-based directives, that supports capturing
138 /// of used expression from loop statement.
139 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
140 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
141 const DeclStmt *PreInits;
142 CodeGenFunction::OMPMapVars PreCondVars;
143 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
144 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
145 for (const auto *E : LD->counters()) {
146 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
147 EmittedAsPrivate.insert(VD->getCanonicalDecl());
148 (void)PreCondVars.setVarAddr(
149 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
151 // Mark private vars as undefs.
152 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
153 for (const Expr *IRef : C->varlists()) {
154 const auto *OrigVD =
155 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
156 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
157 QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
158 (void)PreCondVars.setVarAddr(
159 CGF, OrigVD,
160 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
161 CGF.getContext().getPointerType(OrigVDTy))),
162 CGF.ConvertTypeForMem(OrigVDTy),
163 CGF.getContext().getDeclAlign(OrigVD)));
167 (void)PreCondVars.apply(CGF);
168 // Emit init, __range and __end variables for C++ range loops.
169 (void)OMPLoopBasedDirective::doForAllLoops(
170 LD->getInnermostCapturedStmt()->getCapturedStmt(),
171 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
172 [&CGF](unsigned Cnt, const Stmt *CurStmt) {
173 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
174 if (const Stmt *Init = CXXFor->getInit())
175 CGF.EmitStmt(Init);
176 CGF.EmitStmt(CXXFor->getRangeStmt());
177 CGF.EmitStmt(CXXFor->getEndStmt());
179 return false;
181 PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
182 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
183 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
184 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
185 PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
186 } else {
187 llvm_unreachable("Unknown loop-based directive kind.");
189 if (PreInits) {
190 for (const auto *I : PreInits->decls())
191 CGF.EmitVarDecl(cast<VarDecl>(*I));
193 PreCondVars.restore(CGF);
196 public:
197 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
198 : CodeGenFunction::RunCleanupsScope(CGF) {
199 emitPreInitStmt(CGF, S);
203 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
204 CodeGenFunction::OMPPrivateScope InlinedShareds;
206 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
207 return CGF.LambdaCaptureFields.lookup(VD) ||
208 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
209 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
210 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
213 public:
214 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
215 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
216 InlinedShareds(CGF) {
217 for (const auto *C : S.clauses()) {
218 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
219 if (const auto *PreInit =
220 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
221 for (const auto *I : PreInit->decls()) {
222 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
223 CGF.EmitVarDecl(cast<VarDecl>(*I));
224 } else {
225 CodeGenFunction::AutoVarEmission Emission =
226 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
227 CGF.EmitAutoVarCleanups(Emission);
231 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
232 for (const Expr *E : UDP->varlists()) {
233 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
234 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
235 CGF.EmitVarDecl(*OED);
237 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
238 for (const Expr *E : UDP->varlists()) {
239 const Decl *D = getBaseDecl(E);
240 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
241 CGF.EmitVarDecl(*OED);
245 if (!isOpenMPSimdDirective(S.getDirectiveKind()))
246 CGF.EmitOMPPrivateClause(S, InlinedShareds);
247 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
248 if (const Expr *E = TG->getReductionRef())
249 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
251 // Temp copy arrays for inscan reductions should not be emitted as they are
252 // not used in simd only mode.
253 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
254 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
255 if (C->getModifier() != OMPC_REDUCTION_inscan)
256 continue;
257 for (const Expr *E : C->copy_array_temps())
258 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
260 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
261 while (CS) {
262 for (auto &C : CS->captures()) {
263 if (C.capturesVariable() || C.capturesVariableByCopy()) {
264 auto *VD = C.getCapturedVar();
265 if (CopyArrayTemps.contains(VD))
266 continue;
267 assert(VD == VD->getCanonicalDecl() &&
268 "Canonical decl must be captured.");
269 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
270 isCapturedVar(CGF, VD) ||
271 (CGF.CapturedStmtInfo &&
272 InlinedShareds.isGlobalVarCaptured(VD)),
273 VD->getType().getNonReferenceType(), VK_LValue,
274 C.getLocation());
275 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
278 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
280 (void)InlinedShareds.Privatize();
284 } // namespace
286 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
287 const OMPExecutableDirective &S,
288 const RegionCodeGenTy &CodeGen);
290 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
291 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
292 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
293 OrigVD = OrigVD->getCanonicalDecl();
294 bool IsCaptured =
295 LambdaCaptureFields.lookup(OrigVD) ||
296 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
297 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
298 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
299 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
300 return EmitLValue(&DRE);
303 return EmitLValue(E);
306 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
307 ASTContext &C = getContext();
308 llvm::Value *Size = nullptr;
309 auto SizeInChars = C.getTypeSizeInChars(Ty);
310 if (SizeInChars.isZero()) {
311 // getTypeSizeInChars() returns 0 for a VLA.
312 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
313 VlaSizePair VlaSize = getVLASize(VAT);
314 Ty = VlaSize.Type;
315 Size =
316 Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts;
318 SizeInChars = C.getTypeSizeInChars(Ty);
319 if (SizeInChars.isZero())
320 return llvm::ConstantInt::get(SizeTy, /*V=*/0);
321 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
323 return CGM.getSize(SizeInChars);
326 void CodeGenFunction::GenerateOpenMPCapturedVars(
327 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
328 const RecordDecl *RD = S.getCapturedRecordDecl();
329 auto CurField = RD->field_begin();
330 auto CurCap = S.captures().begin();
331 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
332 E = S.capture_init_end();
333 I != E; ++I, ++CurField, ++CurCap) {
334 if (CurField->hasCapturedVLAType()) {
335 const VariableArrayType *VAT = CurField->getCapturedVLAType();
336 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
337 CapturedVars.push_back(Val);
338 } else if (CurCap->capturesThis()) {
339 CapturedVars.push_back(CXXThisValue);
340 } else if (CurCap->capturesVariableByCopy()) {
341 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
343 // If the field is not a pointer, we need to save the actual value
344 // and load it as a void pointer.
345 if (!CurField->getType()->isAnyPointerType()) {
346 ASTContext &Ctx = getContext();
347 Address DstAddr = CreateMemTemp(
348 Ctx.getUIntPtrType(),
349 Twine(CurCap->getCapturedVar()->getName(), ".casted"));
350 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
352 llvm::Value *SrcAddrVal = EmitScalarConversion(
353 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
354 Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
355 LValue SrcLV =
356 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
358 // Store the value using the source type pointer.
359 EmitStoreThroughLValue(RValue::get(CV), SrcLV);
361 // Load the value using the destination type pointer.
362 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
364 CapturedVars.push_back(CV);
365 } else {
366 assert(CurCap->capturesVariable() && "Expected capture by reference.");
367 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
372 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
373 QualType DstType, StringRef Name,
374 LValue AddrLV) {
375 ASTContext &Ctx = CGF.getContext();
377 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
378 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
379 Ctx.getPointerType(DstType), Loc);
380 Address TmpAddr =
381 CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(CGF);
382 return TmpAddr;
385 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
386 if (T->isLValueReferenceType())
387 return C.getLValueReferenceType(
388 getCanonicalParamType(C, T.getNonReferenceType()),
389 /*SpelledAsLValue=*/false);
390 if (T->isPointerType())
391 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
392 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
393 if (const auto *VLA = dyn_cast<VariableArrayType>(A))
394 return getCanonicalParamType(C, VLA->getElementType());
395 if (!A->isVariablyModifiedType())
396 return C.getCanonicalType(T);
398 return C.getCanonicalParamType(T);
401 namespace {
402 /// Contains required data for proper outlined function codegen.
403 struct FunctionOptions {
404 /// Captured statement for which the function is generated.
405 const CapturedStmt *S = nullptr;
406 /// true if cast to/from UIntPtr is required for variables captured by
407 /// value.
408 const bool UIntPtrCastRequired = true;
409 /// true if only casted arguments must be registered as local args or VLA
410 /// sizes.
411 const bool RegisterCastedArgsOnly = false;
412 /// Name of the generated function.
413 const StringRef FunctionName;
414 /// Location of the non-debug version of the outlined function.
415 SourceLocation Loc;
416 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
417 bool RegisterCastedArgsOnly, StringRef FunctionName,
418 SourceLocation Loc)
419 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
420 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
421 FunctionName(FunctionName), Loc(Loc) {}
423 } // namespace
425 static llvm::Function *emitOutlinedFunctionPrologue(
426 CodeGenFunction &CGF, FunctionArgList &Args,
427 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
428 &LocalAddrs,
429 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
430 &VLASizes,
431 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
432 const CapturedDecl *CD = FO.S->getCapturedDecl();
433 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
434 assert(CD->hasBody() && "missing CapturedDecl body");
436 CXXThisValue = nullptr;
437 // Build the argument list.
438 CodeGenModule &CGM = CGF.CGM;
439 ASTContext &Ctx = CGM.getContext();
440 FunctionArgList TargetArgs;
441 Args.append(CD->param_begin(),
442 std::next(CD->param_begin(), CD->getContextParamPosition()));
443 TargetArgs.append(
444 CD->param_begin(),
445 std::next(CD->param_begin(), CD->getContextParamPosition()));
446 auto I = FO.S->captures().begin();
447 FunctionDecl *DebugFunctionDecl = nullptr;
448 if (!FO.UIntPtrCastRequired) {
449 FunctionProtoType::ExtProtoInfo EPI;
450 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, std::nullopt, EPI);
451 DebugFunctionDecl = FunctionDecl::Create(
452 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
453 SourceLocation(), DeclarationName(), FunctionTy,
454 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
455 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
456 /*hasWrittenPrototype=*/false);
458 for (const FieldDecl *FD : RD->fields()) {
459 QualType ArgType = FD->getType();
460 IdentifierInfo *II = nullptr;
461 VarDecl *CapVar = nullptr;
463 // If this is a capture by copy and the type is not a pointer, the outlined
464 // function argument type should be uintptr and the value properly casted to
465 // uintptr. This is necessary given that the runtime library is only able to
466 // deal with pointers. We can pass in the same way the VLA type sizes to the
467 // outlined function.
468 if (FO.UIntPtrCastRequired &&
469 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
470 I->capturesVariableArrayType()))
471 ArgType = Ctx.getUIntPtrType();
473 if (I->capturesVariable() || I->capturesVariableByCopy()) {
474 CapVar = I->getCapturedVar();
475 II = CapVar->getIdentifier();
476 } else if (I->capturesThis()) {
477 II = &Ctx.Idents.get("this");
478 } else {
479 assert(I->capturesVariableArrayType());
480 II = &Ctx.Idents.get("vla");
482 if (ArgType->isVariablyModifiedType())
483 ArgType = getCanonicalParamType(Ctx, ArgType);
484 VarDecl *Arg;
485 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
486 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
487 II, ArgType,
488 ImplicitParamDecl::ThreadPrivateVar);
489 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
490 Arg = ParmVarDecl::Create(
491 Ctx, DebugFunctionDecl,
492 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
493 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
494 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
495 } else {
496 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
497 II, ArgType, ImplicitParamDecl::Other);
499 Args.emplace_back(Arg);
500 // Do not cast arguments if we emit function with non-original types.
501 TargetArgs.emplace_back(
502 FO.UIntPtrCastRequired
503 ? Arg
504 : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
505 ++I;
507 Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
508 CD->param_end());
509 TargetArgs.append(
510 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
511 CD->param_end());
513 // Create the function declaration.
514 const CGFunctionInfo &FuncInfo =
515 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
516 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
518 auto *F =
519 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
520 FO.FunctionName, &CGM.getModule());
521 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
522 if (CD->isNothrow())
523 F->setDoesNotThrow();
524 F->setDoesNotRecurse();
526 // Always inline the outlined function if optimizations are enabled.
527 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
528 F->removeFnAttr(llvm::Attribute::NoInline);
529 F->addFnAttr(llvm::Attribute::AlwaysInline);
532 // Generate the function.
533 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
534 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
535 FO.UIntPtrCastRequired ? FO.Loc
536 : CD->getBody()->getBeginLoc());
537 unsigned Cnt = CD->getContextParamPosition();
538 I = FO.S->captures().begin();
539 for (const FieldDecl *FD : RD->fields()) {
540 // Do not map arguments if we emit function with non-original types.
541 Address LocalAddr(Address::invalid());
542 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
543 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
544 TargetArgs[Cnt]);
545 } else {
546 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
548 // If we are capturing a pointer by copy we don't need to do anything, just
549 // use the value that we get from the arguments.
550 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
551 const VarDecl *CurVD = I->getCapturedVar();
552 if (!FO.RegisterCastedArgsOnly)
553 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
554 ++Cnt;
555 ++I;
556 continue;
559 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
560 AlignmentSource::Decl);
561 if (FD->hasCapturedVLAType()) {
562 if (FO.UIntPtrCastRequired) {
563 ArgLVal = CGF.MakeAddrLValue(
564 castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
565 Args[Cnt]->getName(), ArgLVal),
566 FD->getType(), AlignmentSource::Decl);
568 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
569 const VariableArrayType *VAT = FD->getCapturedVLAType();
570 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
571 } else if (I->capturesVariable()) {
572 const VarDecl *Var = I->getCapturedVar();
573 QualType VarTy = Var->getType();
574 Address ArgAddr = ArgLVal.getAddress(CGF);
575 if (ArgLVal.getType()->isLValueReferenceType()) {
576 ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
577 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
578 assert(ArgLVal.getType()->isPointerType());
579 ArgAddr = CGF.EmitLoadOfPointer(
580 ArgAddr, ArgLVal.getType()->castAs<PointerType>());
582 if (!FO.RegisterCastedArgsOnly) {
583 LocalAddrs.insert(
584 {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}});
586 } else if (I->capturesVariableByCopy()) {
587 assert(!FD->getType()->isAnyPointerType() &&
588 "Not expecting a captured pointer.");
589 const VarDecl *Var = I->getCapturedVar();
590 LocalAddrs.insert({Args[Cnt],
591 {Var, FO.UIntPtrCastRequired
592 ? castValueFromUintptr(
593 CGF, I->getLocation(), FD->getType(),
594 Args[Cnt]->getName(), ArgLVal)
595 : ArgLVal.getAddress(CGF)}});
596 } else {
597 // If 'this' is captured, load it into CXXThisValue.
598 assert(I->capturesThis());
599 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
600 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
602 ++Cnt;
603 ++I;
606 return F;
609 llvm::Function *
610 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
611 SourceLocation Loc) {
612 assert(
613 CapturedStmtInfo &&
614 "CapturedStmtInfo should be set when generating the captured function");
615 const CapturedDecl *CD = S.getCapturedDecl();
616 // Build the argument list.
617 bool NeedWrapperFunction =
618 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
619 FunctionArgList Args;
620 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
621 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
622 SmallString<256> Buffer;
623 llvm::raw_svector_ostream Out(Buffer);
624 Out << CapturedStmtInfo->getHelperName();
625 if (NeedWrapperFunction)
626 Out << "_debug__";
627 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
628 Out.str(), Loc);
629 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
630 VLASizes, CXXThisValue, FO);
631 CodeGenFunction::OMPPrivateScope LocalScope(*this);
632 for (const auto &LocalAddrPair : LocalAddrs) {
633 if (LocalAddrPair.second.first) {
634 LocalScope.addPrivate(LocalAddrPair.second.first,
635 LocalAddrPair.second.second);
638 (void)LocalScope.Privatize();
639 for (const auto &VLASizePair : VLASizes)
640 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
641 PGO.assignRegionCounters(GlobalDecl(CD), F);
642 CapturedStmtInfo->EmitBody(*this, CD->getBody());
643 (void)LocalScope.ForceCleanup();
644 FinishFunction(CD->getBodyRBrace());
645 if (!NeedWrapperFunction)
646 return F;
648 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
649 /*RegisterCastedArgsOnly=*/true,
650 CapturedStmtInfo->getHelperName(), Loc);
651 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
652 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
653 Args.clear();
654 LocalAddrs.clear();
655 VLASizes.clear();
656 llvm::Function *WrapperF =
657 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
658 WrapperCGF.CXXThisValue, WrapperFO);
659 llvm::SmallVector<llvm::Value *, 4> CallArgs;
660 auto *PI = F->arg_begin();
661 for (const auto *Arg : Args) {
662 llvm::Value *CallArg;
663 auto I = LocalAddrs.find(Arg);
664 if (I != LocalAddrs.end()) {
665 LValue LV = WrapperCGF.MakeAddrLValue(
666 I->second.second,
667 I->second.first ? I->second.first->getType() : Arg->getType(),
668 AlignmentSource::Decl);
669 if (LV.getType()->isAnyComplexType())
670 LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
671 LV.getAddress(WrapperCGF),
672 PI->getType()->getPointerTo(
673 LV.getAddress(WrapperCGF).getAddressSpace()),
674 PI->getType()));
675 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
676 } else {
677 auto EI = VLASizes.find(Arg);
678 if (EI != VLASizes.end()) {
679 CallArg = EI->second.second;
680 } else {
681 LValue LV =
682 WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
683 Arg->getType(), AlignmentSource::Decl);
684 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
687 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
688 ++PI;
690 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
691 WrapperCGF.FinishFunction();
692 return WrapperF;
695 //===----------------------------------------------------------------------===//
696 // OpenMP Directive Emission
697 //===----------------------------------------------------------------------===//
698 void CodeGenFunction::EmitOMPAggregateAssign(
699 Address DestAddr, Address SrcAddr, QualType OriginalType,
700 const llvm::function_ref<void(Address, Address)> CopyGen) {
701 // Perform element-by-element initialization.
702 QualType ElementTy;
704 // Drill down to the base element type on both arrays.
705 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
706 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
707 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
709 llvm::Value *SrcBegin = SrcAddr.getPointer();
710 llvm::Value *DestBegin = DestAddr.getPointer();
711 // Cast from pointer to array type to pointer to single element.
712 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(),
713 DestBegin, NumElements);
715 // The basic structure here is a while-do loop.
716 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
717 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
718 llvm::Value *IsEmpty =
719 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
720 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
722 // Enter the loop body, making that address the current address.
723 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
724 EmitBlock(BodyBB);
726 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
728 llvm::PHINode *SrcElementPHI =
729 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
730 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
731 Address SrcElementCurrent =
732 Address(SrcElementPHI, SrcAddr.getElementType(),
733 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
735 llvm::PHINode *DestElementPHI = Builder.CreatePHI(
736 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
737 DestElementPHI->addIncoming(DestBegin, EntryBB);
738 Address DestElementCurrent =
739 Address(DestElementPHI, DestAddr.getElementType(),
740 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
742 // Emit copy.
743 CopyGen(DestElementCurrent, SrcElementCurrent);
745 // Shift the address forward by one element.
746 llvm::Value *DestElementNext =
747 Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
748 /*Idx0=*/1, "omp.arraycpy.dest.element");
749 llvm::Value *SrcElementNext =
750 Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
751 /*Idx0=*/1, "omp.arraycpy.src.element");
752 // Check whether we've reached the end.
753 llvm::Value *Done =
754 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
755 Builder.CreateCondBr(Done, DoneBB, BodyBB);
756 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
757 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
759 // Done.
760 EmitBlock(DoneBB, /*IsFinished=*/true);
763 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
764 Address SrcAddr, const VarDecl *DestVD,
765 const VarDecl *SrcVD, const Expr *Copy) {
766 if (OriginalType->isArrayType()) {
767 const auto *BO = dyn_cast<BinaryOperator>(Copy);
768 if (BO && BO->getOpcode() == BO_Assign) {
769 // Perform simple memcpy for simple copying.
770 LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
771 LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
772 EmitAggregateAssign(Dest, Src, OriginalType);
773 } else {
774 // For arrays with complex element types perform element by element
775 // copying.
776 EmitOMPAggregateAssign(
777 DestAddr, SrcAddr, OriginalType,
778 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
779 // Working with the single array element, so have to remap
780 // destination and source variables to corresponding array
781 // elements.
782 CodeGenFunction::OMPPrivateScope Remap(*this);
783 Remap.addPrivate(DestVD, DestElement);
784 Remap.addPrivate(SrcVD, SrcElement);
785 (void)Remap.Privatize();
786 EmitIgnoredExpr(Copy);
789 } else {
790 // Remap pseudo source variable to private copy.
791 CodeGenFunction::OMPPrivateScope Remap(*this);
792 Remap.addPrivate(SrcVD, SrcAddr);
793 Remap.addPrivate(DestVD, DestAddr);
794 (void)Remap.Privatize();
795 // Emit copying of the whole variable.
796 EmitIgnoredExpr(Copy);
800 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
801 OMPPrivateScope &PrivateScope) {
802 if (!HaveInsertPoint())
803 return false;
804 bool DeviceConstTarget =
805 getLangOpts().OpenMPIsDevice &&
806 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
807 bool FirstprivateIsLastprivate = false;
808 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
809 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
810 for (const auto *D : C->varlists())
811 Lastprivates.try_emplace(
812 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
813 C->getKind());
815 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
816 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
817 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
818 // Force emission of the firstprivate copy if the directive does not emit
819 // outlined function, like omp for, omp simd, omp distribute etc.
820 bool MustEmitFirstprivateCopy =
821 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
822 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
823 const auto *IRef = C->varlist_begin();
824 const auto *InitsRef = C->inits().begin();
825 for (const Expr *IInit : C->private_copies()) {
826 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
827 bool ThisFirstprivateIsLastprivate =
828 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
829 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
830 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
831 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
832 !FD->getType()->isReferenceType() &&
833 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
834 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
835 ++IRef;
836 ++InitsRef;
837 continue;
839 // Do not emit copy for firstprivate constant variables in target regions,
840 // captured by reference.
841 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
842 FD && FD->getType()->isReferenceType() &&
843 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
844 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
845 ++IRef;
846 ++InitsRef;
847 continue;
849 FirstprivateIsLastprivate =
850 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
851 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
852 const auto *VDInit =
853 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
854 bool IsRegistered;
855 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
856 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
857 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
858 LValue OriginalLVal;
859 if (!FD) {
860 // Check if the firstprivate variable is just a constant value.
861 ConstantEmission CE = tryEmitAsConstant(&DRE);
862 if (CE && !CE.isReference()) {
863 // Constant value, no need to create a copy.
864 ++IRef;
865 ++InitsRef;
866 continue;
868 if (CE && CE.isReference()) {
869 OriginalLVal = CE.getReferenceLValue(*this, &DRE);
870 } else {
871 assert(!CE && "Expected non-constant firstprivate.");
872 OriginalLVal = EmitLValue(&DRE);
874 } else {
875 OriginalLVal = EmitLValue(&DRE);
877 QualType Type = VD->getType();
878 if (Type->isArrayType()) {
879 // Emit VarDecl with copy init for arrays.
880 // Get the address of the original variable captured in current
881 // captured region.
882 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
883 const Expr *Init = VD->getInit();
884 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
885 // Perform simple memcpy.
886 LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type);
887 EmitAggregateAssign(Dest, OriginalLVal, Type);
888 } else {
889 EmitOMPAggregateAssign(
890 Emission.getAllocatedAddress(), OriginalLVal.getAddress(*this),
891 Type,
892 [this, VDInit, Init](Address DestElement, Address SrcElement) {
893 // Clean up any temporaries needed by the
894 // initialization.
895 RunCleanupsScope InitScope(*this);
896 // Emit initialization for single element.
897 setAddrOfLocalVar(VDInit, SrcElement);
898 EmitAnyExprToMem(Init, DestElement,
899 Init->getType().getQualifiers(),
900 /*IsInitializer*/ false);
901 LocalDeclMap.erase(VDInit);
904 EmitAutoVarCleanups(Emission);
905 IsRegistered =
906 PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress());
907 } else {
908 Address OriginalAddr = OriginalLVal.getAddress(*this);
909 // Emit private VarDecl with copy init.
910 // Remap temp VDInit variable to the address of the original
911 // variable (for proper handling of captured global variables).
912 setAddrOfLocalVar(VDInit, OriginalAddr);
913 EmitDecl(*VD);
914 LocalDeclMap.erase(VDInit);
915 Address VDAddr = GetAddrOfLocalVar(VD);
916 if (ThisFirstprivateIsLastprivate &&
917 Lastprivates[OrigVD->getCanonicalDecl()] ==
918 OMPC_LASTPRIVATE_conditional) {
919 // Create/init special variable for lastprivate conditionals.
920 llvm::Value *V =
921 EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(),
922 AlignmentSource::Decl),
923 (*IRef)->getExprLoc());
924 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
925 *this, OrigVD);
926 EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(),
927 AlignmentSource::Decl));
928 LocalDeclMap.erase(VD);
929 setAddrOfLocalVar(VD, VDAddr);
931 IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
933 assert(IsRegistered &&
934 "firstprivate var already registered as private");
935 // Silence the warning about unused variable.
936 (void)IsRegistered;
938 ++IRef;
939 ++InitsRef;
942 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
945 void CodeGenFunction::EmitOMPPrivateClause(
946 const OMPExecutableDirective &D,
947 CodeGenFunction::OMPPrivateScope &PrivateScope) {
948 if (!HaveInsertPoint())
949 return;
950 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
951 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
952 auto IRef = C->varlist_begin();
953 for (const Expr *IInit : C->private_copies()) {
954 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
955 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
956 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
957 EmitDecl(*VD);
958 // Emit private VarDecl with copy init.
959 bool IsRegistered =
960 PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD));
961 assert(IsRegistered && "private var already registered as private");
962 // Silence the warning about unused variable.
963 (void)IsRegistered;
965 ++IRef;
970 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
971 if (!HaveInsertPoint())
972 return false;
973 // threadprivate_var1 = master_threadprivate_var1;
974 // operator=(threadprivate_var2, master_threadprivate_var2);
975 // ...
976 // __kmpc_barrier(&loc, global_tid);
977 llvm::DenseSet<const VarDecl *> CopiedVars;
978 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
979 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
980 auto IRef = C->varlist_begin();
981 auto ISrcRef = C->source_exprs().begin();
982 auto IDestRef = C->destination_exprs().begin();
983 for (const Expr *AssignOp : C->assignment_ops()) {
984 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
985 QualType Type = VD->getType();
986 if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
987 // Get the address of the master variable. If we are emitting code with
988 // TLS support, the address is passed from the master as field in the
989 // captured declaration.
990 Address MasterAddr = Address::invalid();
991 if (getLangOpts().OpenMPUseTLS &&
992 getContext().getTargetInfo().isTLSSupported()) {
993 assert(CapturedStmtInfo->lookup(VD) &&
994 "Copyin threadprivates should have been captured!");
995 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
996 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
997 MasterAddr = EmitLValue(&DRE).getAddress(*this);
998 LocalDeclMap.erase(VD);
999 } else {
1000 MasterAddr =
1001 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
1002 : CGM.GetAddrOfGlobal(VD),
1003 CGM.getTypes().ConvertTypeForMem(VD->getType()),
1004 getContext().getDeclAlign(VD));
1006 // Get the address of the threadprivate variable.
1007 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
1008 if (CopiedVars.size() == 1) {
1009 // At first check if current thread is a master thread. If it is, no
1010 // need to copy data.
1011 CopyBegin = createBasicBlock("copyin.not.master");
1012 CopyEnd = createBasicBlock("copyin.not.master.end");
1013 // TODO: Avoid ptrtoint conversion.
1014 auto *MasterAddrInt =
1015 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
1016 auto *PrivateAddrInt =
1017 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
1018 Builder.CreateCondBr(
1019 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1020 CopyEnd);
1021 EmitBlock(CopyBegin);
1023 const auto *SrcVD =
1024 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1025 const auto *DestVD =
1026 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1027 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1029 ++IRef;
1030 ++ISrcRef;
1031 ++IDestRef;
1034 if (CopyEnd) {
1035 // Exit out of copying procedure for non-master thread.
1036 EmitBlock(CopyEnd, /*IsFinished=*/true);
1037 return true;
1039 return false;
1042 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1043 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1044 if (!HaveInsertPoint())
1045 return false;
1046 bool HasAtLeastOneLastprivate = false;
1047 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1048 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1049 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1050 for (const Expr *C : LoopDirective->counters()) {
1051 SIMDLCVs.insert(
1052 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1055 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1056 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1057 HasAtLeastOneLastprivate = true;
1058 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1059 !getLangOpts().OpenMPSimd)
1060 break;
1061 const auto *IRef = C->varlist_begin();
1062 const auto *IDestRef = C->destination_exprs().begin();
1063 for (const Expr *IInit : C->private_copies()) {
1064 // Keep the address of the original variable for future update at the end
1065 // of the loop.
1066 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1067 // Taskloops do not require additional initialization, it is done in
1068 // runtime support library.
1069 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1070 const auto *DestVD =
1071 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1072 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1073 /*RefersToEnclosingVariableOrCapture=*/
1074 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1075 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1076 PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress(*this));
1077 // Check if the variable is also a firstprivate: in this case IInit is
1078 // not generated. Initialization of this variable will happen in codegen
1079 // for 'firstprivate' clause.
1080 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1081 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1082 Address VDAddr = Address::invalid();
1083 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1084 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1085 *this, OrigVD);
1086 setAddrOfLocalVar(VD, VDAddr);
1087 } else {
1088 // Emit private VarDecl with copy init.
1089 EmitDecl(*VD);
1090 VDAddr = GetAddrOfLocalVar(VD);
1092 bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
1093 assert(IsRegistered &&
1094 "lastprivate var already registered as private");
1095 (void)IsRegistered;
1098 ++IRef;
1099 ++IDestRef;
1102 return HasAtLeastOneLastprivate;
1105 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1106 const OMPExecutableDirective &D, bool NoFinals,
1107 llvm::Value *IsLastIterCond) {
1108 if (!HaveInsertPoint())
1109 return;
1110 // Emit following code:
1111 // if (<IsLastIterCond>) {
1112 // orig_var1 = private_orig_var1;
1113 // ...
1114 // orig_varn = private_orig_varn;
1115 // }
1116 llvm::BasicBlock *ThenBB = nullptr;
1117 llvm::BasicBlock *DoneBB = nullptr;
1118 if (IsLastIterCond) {
1119 // Emit implicit barrier if at least one lastprivate conditional is found
1120 // and this is not a simd mode.
1121 if (!getLangOpts().OpenMPSimd &&
1122 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1123 [](const OMPLastprivateClause *C) {
1124 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1125 })) {
1126 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1127 OMPD_unknown,
1128 /*EmitChecks=*/false,
1129 /*ForceSimpleCall=*/true);
1131 ThenBB = createBasicBlock(".omp.lastprivate.then");
1132 DoneBB = createBasicBlock(".omp.lastprivate.done");
1133 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1134 EmitBlock(ThenBB);
1136 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1137 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1138 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1139 auto IC = LoopDirective->counters().begin();
1140 for (const Expr *F : LoopDirective->finals()) {
1141 const auto *D =
1142 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1143 if (NoFinals)
1144 AlreadyEmittedVars.insert(D);
1145 else
1146 LoopCountersAndUpdates[D] = F;
1147 ++IC;
1150 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1151 auto IRef = C->varlist_begin();
1152 auto ISrcRef = C->source_exprs().begin();
1153 auto IDestRef = C->destination_exprs().begin();
1154 for (const Expr *AssignOp : C->assignment_ops()) {
1155 const auto *PrivateVD =
1156 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1157 QualType Type = PrivateVD->getType();
1158 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1159 if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1160 // If lastprivate variable is a loop control variable for loop-based
1161 // directive, update its value before copyin back to original
1162 // variable.
1163 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1164 EmitIgnoredExpr(FinalExpr);
1165 const auto *SrcVD =
1166 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1167 const auto *DestVD =
1168 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1169 // Get the address of the private variable.
1170 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1171 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1172 PrivateAddr = Address(
1173 Builder.CreateLoad(PrivateAddr),
1174 CGM.getTypes().ConvertTypeForMem(RefTy->getPointeeType()),
1175 CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1176 // Store the last value to the private copy in the last iteration.
1177 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1178 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1179 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1180 (*IRef)->getExprLoc());
1181 // Get the address of the original variable.
1182 Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1183 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1185 ++IRef;
1186 ++ISrcRef;
1187 ++IDestRef;
1189 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1190 EmitIgnoredExpr(PostUpdate);
1192 if (IsLastIterCond)
1193 EmitBlock(DoneBB, /*IsFinished=*/true);
1196 void CodeGenFunction::EmitOMPReductionClauseInit(
1197 const OMPExecutableDirective &D,
1198 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1199 if (!HaveInsertPoint())
1200 return;
1201 SmallVector<const Expr *, 4> Shareds;
1202 SmallVector<const Expr *, 4> Privates;
1203 SmallVector<const Expr *, 4> ReductionOps;
1204 SmallVector<const Expr *, 4> LHSs;
1205 SmallVector<const Expr *, 4> RHSs;
1206 OMPTaskDataTy Data;
1207 SmallVector<const Expr *, 4> TaskLHSs;
1208 SmallVector<const Expr *, 4> TaskRHSs;
1209 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1210 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1211 continue;
1212 Shareds.append(C->varlist_begin(), C->varlist_end());
1213 Privates.append(C->privates().begin(), C->privates().end());
1214 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1215 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1216 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1217 if (C->getModifier() == OMPC_REDUCTION_task) {
1218 Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1219 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1220 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1221 Data.ReductionOps.append(C->reduction_ops().begin(),
1222 C->reduction_ops().end());
1223 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1224 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1227 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1228 unsigned Count = 0;
1229 auto *ILHS = LHSs.begin();
1230 auto *IRHS = RHSs.begin();
1231 auto *IPriv = Privates.begin();
1232 for (const Expr *IRef : Shareds) {
1233 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1234 // Emit private VarDecl with reduction init.
1235 RedCG.emitSharedOrigLValue(*this, Count);
1236 RedCG.emitAggregateType(*this, Count);
1237 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1238 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1239 RedCG.getSharedLValue(Count).getAddress(*this),
1240 [&Emission](CodeGenFunction &CGF) {
1241 CGF.EmitAutoVarInit(Emission);
1242 return true;
1244 EmitAutoVarCleanups(Emission);
1245 Address BaseAddr = RedCG.adjustPrivateAddress(
1246 *this, Count, Emission.getAllocatedAddress());
1247 bool IsRegistered =
1248 PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr);
1249 assert(IsRegistered && "private var already registered as private");
1250 // Silence the warning about unused variable.
1251 (void)IsRegistered;
1253 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1254 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1255 QualType Type = PrivateVD->getType();
1256 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1257 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1258 // Store the address of the original variable associated with the LHS
1259 // implicit variable.
1260 PrivateScope.addPrivate(LHSVD,
1261 RedCG.getSharedLValue(Count).getAddress(*this));
1262 PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD));
1263 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1264 isa<ArraySubscriptExpr>(IRef)) {
1265 // Store the address of the original variable associated with the LHS
1266 // implicit variable.
1267 PrivateScope.addPrivate(LHSVD,
1268 RedCG.getSharedLValue(Count).getAddress(*this));
1269 PrivateScope.addPrivate(RHSVD, Builder.CreateElementBitCast(
1270 GetAddrOfLocalVar(PrivateVD),
1271 ConvertTypeForMem(RHSVD->getType()),
1272 "rhs.begin"));
1273 } else {
1274 QualType Type = PrivateVD->getType();
1275 bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1276 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1277 // Store the address of the original variable associated with the LHS
1278 // implicit variable.
1279 if (IsArray) {
1280 OriginalAddr = Builder.CreateElementBitCast(
1281 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1283 PrivateScope.addPrivate(LHSVD, OriginalAddr);
1284 PrivateScope.addPrivate(
1285 RHSVD, IsArray ? Builder.CreateElementBitCast(
1286 GetAddrOfLocalVar(PrivateVD),
1287 ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1288 : GetAddrOfLocalVar(PrivateVD));
1290 ++ILHS;
1291 ++IRHS;
1292 ++IPriv;
1293 ++Count;
1295 if (!Data.ReductionVars.empty()) {
1296 Data.IsReductionWithTaskMod = true;
1297 Data.IsWorksharingReduction =
1298 isOpenMPWorksharingDirective(D.getDirectiveKind());
1299 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1300 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1301 const Expr *TaskRedRef = nullptr;
1302 switch (D.getDirectiveKind()) {
1303 case OMPD_parallel:
1304 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1305 break;
1306 case OMPD_for:
1307 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1308 break;
1309 case OMPD_sections:
1310 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1311 break;
1312 case OMPD_parallel_for:
1313 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1314 break;
1315 case OMPD_parallel_master:
1316 TaskRedRef =
1317 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1318 break;
1319 case OMPD_parallel_sections:
1320 TaskRedRef =
1321 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1322 break;
1323 case OMPD_target_parallel:
1324 TaskRedRef =
1325 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1326 break;
1327 case OMPD_target_parallel_for:
1328 TaskRedRef =
1329 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1330 break;
1331 case OMPD_distribute_parallel_for:
1332 TaskRedRef =
1333 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1334 break;
1335 case OMPD_teams_distribute_parallel_for:
1336 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1337 .getTaskReductionRefExpr();
1338 break;
1339 case OMPD_target_teams_distribute_parallel_for:
1340 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1341 .getTaskReductionRefExpr();
1342 break;
1343 case OMPD_simd:
1344 case OMPD_for_simd:
1345 case OMPD_section:
1346 case OMPD_single:
1347 case OMPD_master:
1348 case OMPD_critical:
1349 case OMPD_parallel_for_simd:
1350 case OMPD_task:
1351 case OMPD_taskyield:
1352 case OMPD_error:
1353 case OMPD_barrier:
1354 case OMPD_taskwait:
1355 case OMPD_taskgroup:
1356 case OMPD_flush:
1357 case OMPD_depobj:
1358 case OMPD_scan:
1359 case OMPD_ordered:
1360 case OMPD_atomic:
1361 case OMPD_teams:
1362 case OMPD_target:
1363 case OMPD_cancellation_point:
1364 case OMPD_cancel:
1365 case OMPD_target_data:
1366 case OMPD_target_enter_data:
1367 case OMPD_target_exit_data:
1368 case OMPD_taskloop:
1369 case OMPD_taskloop_simd:
1370 case OMPD_master_taskloop:
1371 case OMPD_master_taskloop_simd:
1372 case OMPD_parallel_master_taskloop:
1373 case OMPD_parallel_master_taskloop_simd:
1374 case OMPD_distribute:
1375 case OMPD_target_update:
1376 case OMPD_distribute_parallel_for_simd:
1377 case OMPD_distribute_simd:
1378 case OMPD_target_parallel_for_simd:
1379 case OMPD_target_simd:
1380 case OMPD_teams_distribute:
1381 case OMPD_teams_distribute_simd:
1382 case OMPD_teams_distribute_parallel_for_simd:
1383 case OMPD_target_teams:
1384 case OMPD_target_teams_distribute:
1385 case OMPD_target_teams_distribute_parallel_for_simd:
1386 case OMPD_target_teams_distribute_simd:
1387 case OMPD_declare_target:
1388 case OMPD_end_declare_target:
1389 case OMPD_threadprivate:
1390 case OMPD_allocate:
1391 case OMPD_declare_reduction:
1392 case OMPD_declare_mapper:
1393 case OMPD_declare_simd:
1394 case OMPD_requires:
1395 case OMPD_declare_variant:
1396 case OMPD_begin_declare_variant:
1397 case OMPD_end_declare_variant:
1398 case OMPD_unknown:
1399 default:
1400 llvm_unreachable("Enexpected directive with task reductions.");
1403 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1404 EmitVarDecl(*VD);
1405 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1406 /*Volatile=*/false, TaskRedRef->getType());
1410 void CodeGenFunction::EmitOMPReductionClauseFinal(
1411 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1412 if (!HaveInsertPoint())
1413 return;
1414 llvm::SmallVector<const Expr *, 8> Privates;
1415 llvm::SmallVector<const Expr *, 8> LHSExprs;
1416 llvm::SmallVector<const Expr *, 8> RHSExprs;
1417 llvm::SmallVector<const Expr *, 8> ReductionOps;
1418 bool HasAtLeastOneReduction = false;
1419 bool IsReductionWithTaskMod = false;
1420 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1421 // Do not emit for inscan reductions.
1422 if (C->getModifier() == OMPC_REDUCTION_inscan)
1423 continue;
1424 HasAtLeastOneReduction = true;
1425 Privates.append(C->privates().begin(), C->privates().end());
1426 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1427 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1428 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1429 IsReductionWithTaskMod =
1430 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1432 if (HasAtLeastOneReduction) {
1433 if (IsReductionWithTaskMod) {
1434 CGM.getOpenMPRuntime().emitTaskReductionFini(
1435 *this, D.getBeginLoc(),
1436 isOpenMPWorksharingDirective(D.getDirectiveKind()));
1438 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1439 isOpenMPParallelDirective(D.getDirectiveKind()) ||
1440 ReductionKind == OMPD_simd;
1441 bool SimpleReduction = ReductionKind == OMPD_simd;
1442 // Emit nowait reduction if nowait clause is present or directive is a
1443 // parallel directive (it always has implicit barrier).
1444 CGM.getOpenMPRuntime().emitReduction(
1445 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1446 {WithNowait, SimpleReduction, ReductionKind});
1450 static void emitPostUpdateForReductionClause(
1451 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1452 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1453 if (!CGF.HaveInsertPoint())
1454 return;
1455 llvm::BasicBlock *DoneBB = nullptr;
1456 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1457 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1458 if (!DoneBB) {
1459 if (llvm::Value *Cond = CondGen(CGF)) {
1460 // If the first post-update expression is found, emit conditional
1461 // block if it was requested.
1462 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1463 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1464 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1465 CGF.EmitBlock(ThenBB);
1468 CGF.EmitIgnoredExpr(PostUpdate);
1471 if (DoneBB)
1472 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1475 namespace {
1476 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1477 /// parallel function. This is necessary for combined constructs such as
1478 /// 'distribute parallel for'
1479 typedef llvm::function_ref<void(CodeGenFunction &,
1480 const OMPExecutableDirective &,
1481 llvm::SmallVectorImpl<llvm::Value *> &)>
1482 CodeGenBoundParametersTy;
1483 } // anonymous namespace
1485 static void
1486 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1487 const OMPExecutableDirective &S) {
1488 if (CGF.getLangOpts().OpenMP < 50)
1489 return;
1490 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1491 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1492 for (const Expr *Ref : C->varlists()) {
1493 if (!Ref->getType()->isScalarType())
1494 continue;
1495 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1496 if (!DRE)
1497 continue;
1498 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1499 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1502 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1503 for (const Expr *Ref : C->varlists()) {
1504 if (!Ref->getType()->isScalarType())
1505 continue;
1506 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1507 if (!DRE)
1508 continue;
1509 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1510 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1513 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1514 for (const Expr *Ref : C->varlists()) {
1515 if (!Ref->getType()->isScalarType())
1516 continue;
1517 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1518 if (!DRE)
1519 continue;
1520 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1521 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1524 // Privates should ne analyzed since they are not captured at all.
1525 // Task reductions may be skipped - tasks are ignored.
1526 // Firstprivates do not return value but may be passed by reference - no need
1527 // to check for updated lastprivate conditional.
1528 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1529 for (const Expr *Ref : C->varlists()) {
1530 if (!Ref->getType()->isScalarType())
1531 continue;
1532 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1533 if (!DRE)
1534 continue;
1535 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1538 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1539 CGF, S, PrivateDecls);
1542 static void emitCommonOMPParallelDirective(
1543 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1544 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1545 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1546 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1547 llvm::Value *NumThreads = nullptr;
1548 llvm::Function *OutlinedFn =
1549 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1550 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1551 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1552 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1553 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1554 /*IgnoreResultAssign=*/true);
1555 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1556 CGF, NumThreads, NumThreadsClause->getBeginLoc());
1558 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1559 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1560 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1561 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1563 const Expr *IfCond = nullptr;
1564 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1565 if (C->getNameModifier() == OMPD_unknown ||
1566 C->getNameModifier() == OMPD_parallel) {
1567 IfCond = C->getCondition();
1568 break;
1572 OMPParallelScope Scope(CGF, S);
1573 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1574 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1575 // lower and upper bounds with the pragma 'for' chunking mechanism.
1576 // The following lambda takes care of appending the lower and upper bound
1577 // parameters when necessary
1578 CodeGenBoundParameters(CGF, S, CapturedVars);
1579 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1580 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1581 CapturedVars, IfCond, NumThreads);
1584 static bool isAllocatableDecl(const VarDecl *VD) {
1585 const VarDecl *CVD = VD->getCanonicalDecl();
1586 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1587 return false;
1588 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1589 // Use the default allocation.
1590 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1591 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1592 !AA->getAllocator());
1595 static void emitEmptyBoundParameters(CodeGenFunction &,
1596 const OMPExecutableDirective &,
1597 llvm::SmallVectorImpl<llvm::Value *> &) {}
1599 static void emitOMPCopyinClause(CodeGenFunction &CGF,
1600 const OMPExecutableDirective &S) {
1601 bool Copyins = CGF.EmitOMPCopyinClause(S);
1602 if (Copyins) {
1603 // Emit implicit barrier to synchronize threads and avoid data races on
1604 // propagation master's thread values of threadprivate variables to local
1605 // instances of that variables of all other implicit threads.
1606 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1607 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1608 /*ForceSimpleCall=*/true);
1612 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1613 CodeGenFunction &CGF, const VarDecl *VD) {
1614 CodeGenModule &CGM = CGF.CGM;
1615 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1617 if (!VD)
1618 return Address::invalid();
1619 const VarDecl *CVD = VD->getCanonicalDecl();
1620 if (!isAllocatableDecl(CVD))
1621 return Address::invalid();
1622 llvm::Value *Size;
1623 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1624 if (CVD->getType()->isVariablyModifiedType()) {
1625 Size = CGF.getTypeSize(CVD->getType());
1626 // Align the size: ((size + align - 1) / align) * align
1627 Size = CGF.Builder.CreateNUWAdd(
1628 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1629 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1630 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1631 } else {
1632 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1633 Size = CGM.getSize(Sz.alignTo(Align));
1636 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1637 assert(AA->getAllocator() &&
1638 "Expected allocator expression for non-default allocator.");
1639 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1640 // According to the standard, the original allocator type is a enum (integer).
1641 // Convert to pointer type, if required.
1642 if (Allocator->getType()->isIntegerTy())
1643 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1644 else if (Allocator->getType()->isPointerTy())
1645 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1646 CGM.VoidPtrTy);
1648 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1649 CGF.Builder, Size, Allocator,
1650 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1651 llvm::CallInst *FreeCI =
1652 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1654 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1655 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1656 Addr,
1657 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1658 getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1659 return Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
1662 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1663 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1664 SourceLocation Loc) {
1665 CodeGenModule &CGM = CGF.CGM;
1666 if (CGM.getLangOpts().OpenMPUseTLS &&
1667 CGM.getContext().getTargetInfo().isTLSSupported())
1668 return VDAddr;
1670 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1672 llvm::Type *VarTy = VDAddr.getElementType();
1673 llvm::Value *Data =
1674 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1675 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1676 std::string Suffix = getNameWithSeparators({"cache", ""});
1677 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1679 llvm::CallInst *ThreadPrivateCacheCall =
1680 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1682 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
1685 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1686 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1687 SmallString<128> Buffer;
1688 llvm::raw_svector_ostream OS(Buffer);
1689 StringRef Sep = FirstSeparator;
1690 for (StringRef Part : Parts) {
1691 OS << Sep << Part;
1692 Sep = Separator;
1694 return OS.str().str();
1697 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1698 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1699 InsertPointTy CodeGenIP, Twine RegionName) {
1700 CGBuilderTy &Builder = CGF.Builder;
1701 Builder.restoreIP(CodeGenIP);
1702 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1703 "." + RegionName + ".after");
1706 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1707 CGF.EmitStmt(RegionBodyStmt);
1710 if (Builder.saveIP().isSet())
1711 Builder.CreateBr(FiniBB);
1714 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1715 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1716 InsertPointTy CodeGenIP, Twine RegionName) {
1717 CGBuilderTy &Builder = CGF.Builder;
1718 Builder.restoreIP(CodeGenIP);
1719 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1720 "." + RegionName + ".after");
1723 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1724 CGF.EmitStmt(RegionBodyStmt);
1727 if (Builder.saveIP().isSet())
1728 Builder.CreateBr(FiniBB);
1731 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1732 if (CGM.getLangOpts().OpenMPIRBuilder) {
1733 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1734 // Check if we have any if clause associated with the directive.
1735 llvm::Value *IfCond = nullptr;
1736 if (const auto *C = S.getSingleClause<OMPIfClause>())
1737 IfCond = EmitScalarExpr(C->getCondition(),
1738 /*IgnoreResultAssign=*/true);
1740 llvm::Value *NumThreads = nullptr;
1741 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1742 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1743 /*IgnoreResultAssign=*/true);
1745 ProcBindKind ProcBind = OMP_PROC_BIND_default;
1746 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1747 ProcBind = ProcBindClause->getProcBindKind();
1749 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1751 // The cleanup callback that finalizes all variabels at the given location,
1752 // thus calls destructors etc.
1753 auto FiniCB = [this](InsertPointTy IP) {
1754 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1757 // Privatization callback that performs appropriate action for
1758 // shared/private/firstprivate/lastprivate/copyin/... variables.
1760 // TODO: This defaults to shared right now.
1761 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1762 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1763 // The next line is appropriate only for variables (Val) with the
1764 // data-sharing attribute "shared".
1765 ReplVal = &Val;
1767 return CodeGenIP;
1770 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1771 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1773 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
1774 InsertPointTy CodeGenIP) {
1775 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1776 *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel");
1779 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1780 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1781 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1782 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1783 Builder.restoreIP(
1784 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1785 IfCond, NumThreads, ProcBind, S.hasCancel()));
1786 return;
1789 // Emit parallel region as a standalone region.
1790 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1791 Action.Enter(CGF);
1792 OMPPrivateScope PrivateScope(CGF);
1793 emitOMPCopyinClause(CGF, S);
1794 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1795 CGF.EmitOMPPrivateClause(S, PrivateScope);
1796 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1797 (void)PrivateScope.Privatize();
1798 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1799 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1802 auto LPCRegion =
1803 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1804 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1805 emitEmptyBoundParameters);
1806 emitPostUpdateForReductionClause(*this, S,
1807 [](CodeGenFunction &) { return nullptr; });
1809 // Check for outer lastprivate conditional update.
1810 checkForLastprivateConditionalUpdate(*this, S);
1813 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
1814 EmitStmt(S.getIfStmt());
1817 namespace {
1818 /// RAII to handle scopes for loop transformation directives.
1819 class OMPTransformDirectiveScopeRAII {
1820 OMPLoopScope *Scope = nullptr;
1821 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1822 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1824 public:
1825 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1826 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1827 Scope = new OMPLoopScope(CGF, *Dir);
1828 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1829 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1832 ~OMPTransformDirectiveScopeRAII() {
1833 if (!Scope)
1834 return;
1835 delete CapInfoRAII;
1836 delete CGSI;
1837 delete Scope;
1840 } // namespace
1842 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1843 int MaxLevel, int Level = 0) {
1844 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1845 const Stmt *SimplifiedS = S->IgnoreContainers();
1846 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1847 PrettyStackTraceLoc CrashInfo(
1848 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1849 "LLVM IR generation of compound statement ('{}')");
1851 // Keep track of the current cleanup stack depth, including debug scopes.
1852 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1853 for (const Stmt *CurStmt : CS->body())
1854 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1855 return;
1857 if (SimplifiedS == NextLoop) {
1858 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
1859 SimplifiedS = Dir->getTransformedStmt();
1860 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1861 SimplifiedS = CanonLoop->getLoopStmt();
1862 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1863 S = For->getBody();
1864 } else {
1865 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1866 "Expected canonical for loop or range-based for loop.");
1867 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1868 CGF.EmitStmt(CXXFor->getLoopVarStmt());
1869 S = CXXFor->getBody();
1871 if (Level + 1 < MaxLevel) {
1872 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1873 S, /*TryImperfectlyNestedLoops=*/true);
1874 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1875 return;
1878 CGF.EmitStmt(S);
1881 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1882 JumpDest LoopExit) {
1883 RunCleanupsScope BodyScope(*this);
1884 // Update counters values on current iteration.
1885 for (const Expr *UE : D.updates())
1886 EmitIgnoredExpr(UE);
1887 // Update the linear variables.
1888 // In distribute directives only loop counters may be marked as linear, no
1889 // need to generate the code for them.
1890 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1891 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1892 for (const Expr *UE : C->updates())
1893 EmitIgnoredExpr(UE);
1897 // On a continue in the body, jump to the end.
1898 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1899 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1900 for (const Expr *E : D.finals_conditions()) {
1901 if (!E)
1902 continue;
1903 // Check that loop counter in non-rectangular nest fits into the iteration
1904 // space.
1905 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1906 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1907 getProfileCount(D.getBody()));
1908 EmitBlock(NextBB);
1911 OMPPrivateScope InscanScope(*this);
1912 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1913 bool IsInscanRegion = InscanScope.Privatize();
1914 if (IsInscanRegion) {
1915 // Need to remember the block before and after scan directive
1916 // to dispatch them correctly depending on the clause used in
1917 // this directive, inclusive or exclusive. For inclusive scan the natural
1918 // order of the blocks is used, for exclusive clause the blocks must be
1919 // executed in reverse order.
1920 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1921 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1922 // No need to allocate inscan exit block, in simd mode it is selected in the
1923 // codegen for the scan directive.
1924 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1925 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1926 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1927 EmitBranch(OMPScanDispatch);
1928 EmitBlock(OMPBeforeScanBlock);
1931 // Emit loop variables for C++ range loops.
1932 const Stmt *Body =
1933 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1934 // Emit loop body.
1935 emitBody(*this, Body,
1936 OMPLoopBasedDirective::tryToFindNextInnerLoop(
1937 Body, /*TryImperfectlyNestedLoops=*/true),
1938 D.getLoopsNumber());
1940 // Jump to the dispatcher at the end of the loop body.
1941 if (IsInscanRegion)
1942 EmitBranch(OMPScanExitBlock);
1944 // The end (updates/cleanups).
1945 EmitBlock(Continue.getBlock());
1946 BreakContinueStack.pop_back();
1949 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1951 /// Emit a captured statement and return the function as well as its captured
1952 /// closure context.
1953 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1954 const CapturedStmt *S) {
1955 LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1956 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1957 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1958 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1959 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1960 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1962 return {F, CapStruct.getPointer(ParentCGF)};
1965 /// Emit a call to a previously captured closure.
1966 static llvm::CallInst *
1967 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1968 llvm::ArrayRef<llvm::Value *> Args) {
1969 // Append the closure context to the argument.
1970 SmallVector<llvm::Value *> EffectiveArgs;
1971 EffectiveArgs.reserve(Args.size() + 1);
1972 llvm::append_range(EffectiveArgs, Args);
1973 EffectiveArgs.push_back(Cap.second);
1975 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
1978 llvm::CanonicalLoopInfo *
1979 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
1980 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1982 // The caller is processing the loop-associated directive processing the \p
1983 // Depth loops nested in \p S. Put the previous pending loop-associated
1984 // directive to the stack. If the current loop-associated directive is a loop
1985 // transformation directive, it will push its generated loops onto the stack
1986 // such that together with the loops left here they form the combined loop
1987 // nest for the parent loop-associated directive.
1988 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
1989 ExpectedOMPLoopDepth = Depth;
1991 EmitStmt(S);
1992 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
1994 // The last added loop is the outermost one.
1995 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
1997 // Pop the \p Depth loops requested by the call from that stack and restore
1998 // the previous context.
1999 OMPLoopNestStack.pop_back_n(Depth);
2000 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
2002 return Result;
2005 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
2006 const Stmt *SyntacticalLoop = S->getLoopStmt();
2007 if (!getLangOpts().OpenMPIRBuilder) {
2008 // Ignore if OpenMPIRBuilder is not enabled.
2009 EmitStmt(SyntacticalLoop);
2010 return;
2013 LexicalScope ForScope(*this, S->getSourceRange());
2015 // Emit init statements. The Distance/LoopVar funcs may reference variable
2016 // declarations they contain.
2017 const Stmt *BodyStmt;
2018 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
2019 if (const Stmt *InitStmt = For->getInit())
2020 EmitStmt(InitStmt);
2021 BodyStmt = For->getBody();
2022 } else if (const auto *RangeFor =
2023 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
2024 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2025 EmitStmt(RangeStmt);
2026 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2027 EmitStmt(BeginStmt);
2028 if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2029 EmitStmt(EndStmt);
2030 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2031 EmitStmt(LoopVarStmt);
2032 BodyStmt = RangeFor->getBody();
2033 } else
2034 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2036 // Emit closure for later use. By-value captures will be captured here.
2037 const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2038 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
2039 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2040 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
2042 // Call the distance function to get the number of iterations of the loop to
2043 // come.
2044 QualType LogicalTy = DistanceFunc->getCapturedDecl()
2045 ->getParam(0)
2046 ->getType()
2047 .getNonReferenceType();
2048 Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
2049 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
2050 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
2052 // Emit the loop structure.
2053 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2054 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2055 llvm::Value *IndVar) {
2056 Builder.restoreIP(CodeGenIP);
2058 // Emit the loop body: Convert the logical iteration number to the loop
2059 // variable and emit the body.
2060 const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2061 LValue LCVal = EmitLValue(LoopVarRef);
2062 Address LoopVarAddress = LCVal.getAddress(*this);
2063 emitCapturedStmtCall(*this, LoopVarClosure,
2064 {LoopVarAddress.getPointer(), IndVar});
2066 RunCleanupsScope BodyScope(*this);
2067 EmitStmt(BodyStmt);
2069 llvm::CanonicalLoopInfo *CL =
2070 OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2072 // Finish up the loop.
2073 Builder.restoreIP(CL->getAfterIP());
2074 ForScope.ForceCleanup();
2076 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2077 OMPLoopNestStack.push_back(CL);
2080 void CodeGenFunction::EmitOMPInnerLoop(
2081 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2082 const Expr *IncExpr,
2083 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2084 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2085 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2087 // Start the loop with a block that tests the condition.
2088 auto CondBlock = createBasicBlock("omp.inner.for.cond");
2089 EmitBlock(CondBlock);
2090 const SourceRange R = S.getSourceRange();
2092 // If attributes are attached, push to the basic block with them.
2093 const auto &OMPED = cast<OMPExecutableDirective>(S);
2094 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2095 const Stmt *SS = ICS->getCapturedStmt();
2096 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2097 OMPLoopNestStack.clear();
2098 if (AS)
2099 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2100 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2101 SourceLocToDebugLoc(R.getEnd()));
2102 else
2103 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2104 SourceLocToDebugLoc(R.getEnd()));
2106 // If there are any cleanups between here and the loop-exit scope,
2107 // create a block to stage a loop exit along.
2108 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2109 if (RequiresCleanup)
2110 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2112 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2114 // Emit condition.
2115 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2116 if (ExitBlock != LoopExit.getBlock()) {
2117 EmitBlock(ExitBlock);
2118 EmitBranchThroughCleanup(LoopExit);
2121 EmitBlock(LoopBody);
2122 incrementProfileCounter(&S);
2124 // Create a block for the increment.
2125 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2126 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2128 BodyGen(*this);
2130 // Emit "IV = IV + 1" and a back-edge to the condition block.
2131 EmitBlock(Continue.getBlock());
2132 EmitIgnoredExpr(IncExpr);
2133 PostIncGen(*this);
2134 BreakContinueStack.pop_back();
2135 EmitBranch(CondBlock);
2136 LoopStack.pop();
2137 // Emit the fall-through block.
2138 EmitBlock(LoopExit.getBlock());
2141 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2142 if (!HaveInsertPoint())
2143 return false;
2144 // Emit inits for the linear variables.
2145 bool HasLinears = false;
2146 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2147 for (const Expr *Init : C->inits()) {
2148 HasLinears = true;
2149 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2150 if (const auto *Ref =
2151 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2152 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2153 const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2154 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2155 CapturedStmtInfo->lookup(OrigVD) != nullptr,
2156 VD->getInit()->getType(), VK_LValue,
2157 VD->getInit()->getExprLoc());
2158 EmitExprAsInit(
2159 &DRE, VD,
2160 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
2161 /*capturedByInit=*/false);
2162 EmitAutoVarCleanups(Emission);
2163 } else {
2164 EmitVarDecl(*VD);
2167 // Emit the linear steps for the linear clauses.
2168 // If a step is not constant, it is pre-calculated before the loop.
2169 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2170 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2171 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2172 // Emit calculation of the linear step.
2173 EmitIgnoredExpr(CS);
2176 return HasLinears;
2179 void CodeGenFunction::EmitOMPLinearClauseFinal(
2180 const OMPLoopDirective &D,
2181 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2182 if (!HaveInsertPoint())
2183 return;
2184 llvm::BasicBlock *DoneBB = nullptr;
2185 // Emit the final values of the linear variables.
2186 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2187 auto IC = C->varlist_begin();
2188 for (const Expr *F : C->finals()) {
2189 if (!DoneBB) {
2190 if (llvm::Value *Cond = CondGen(*this)) {
2191 // If the first post-update expression is found, emit conditional
2192 // block if it was requested.
2193 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2194 DoneBB = createBasicBlock(".omp.linear.pu.done");
2195 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2196 EmitBlock(ThenBB);
2199 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2200 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2201 CapturedStmtInfo->lookup(OrigVD) != nullptr,
2202 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2203 Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
2204 CodeGenFunction::OMPPrivateScope VarScope(*this);
2205 VarScope.addPrivate(OrigVD, OrigAddr);
2206 (void)VarScope.Privatize();
2207 EmitIgnoredExpr(F);
2208 ++IC;
2210 if (const Expr *PostUpdate = C->getPostUpdateExpr())
2211 EmitIgnoredExpr(PostUpdate);
2213 if (DoneBB)
2214 EmitBlock(DoneBB, /*IsFinished=*/true);
2217 static void emitAlignedClause(CodeGenFunction &CGF,
2218 const OMPExecutableDirective &D) {
2219 if (!CGF.HaveInsertPoint())
2220 return;
2221 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2222 llvm::APInt ClauseAlignment(64, 0);
2223 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2224 auto *AlignmentCI =
2225 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2226 ClauseAlignment = AlignmentCI->getValue();
2228 for (const Expr *E : Clause->varlists()) {
2229 llvm::APInt Alignment(ClauseAlignment);
2230 if (Alignment == 0) {
2231 // OpenMP [2.8.1, Description]
2232 // If no optional parameter is specified, implementation-defined default
2233 // alignments for SIMD instructions on the target platforms are assumed.
2234 Alignment =
2235 CGF.getContext()
2236 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2237 E->getType()->getPointeeType()))
2238 .getQuantity();
2240 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2241 "alignment is not power of 2");
2242 if (Alignment != 0) {
2243 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2244 CGF.emitAlignmentAssumption(
2245 PtrValue, E, /*No second loc needed*/ SourceLocation(),
2246 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2252 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2253 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2254 if (!HaveInsertPoint())
2255 return;
2256 auto I = S.private_counters().begin();
2257 for (const Expr *E : S.counters()) {
2258 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2259 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2260 // Emit var without initialization.
2261 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2262 EmitAutoVarCleanups(VarEmission);
2263 LocalDeclMap.erase(PrivateVD);
2264 (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress());
2265 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2266 VD->hasGlobalStorage()) {
2267 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2268 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2269 E->getType(), VK_LValue, E->getExprLoc());
2270 (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress(*this));
2271 } else {
2272 (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress());
2274 ++I;
2276 // Privatize extra loop counters used in loops for ordered(n) clauses.
2277 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2278 if (!C->getNumForLoops())
2279 continue;
2280 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2281 I < E; ++I) {
2282 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2283 const auto *VD = cast<VarDecl>(DRE->getDecl());
2284 // Override only those variables that can be captured to avoid re-emission
2285 // of the variables declared within the loops.
2286 if (DRE->refersToEnclosingVariableOrCapture()) {
2287 (void)LoopScope.addPrivate(
2288 VD, CreateMemTemp(DRE->getType(), VD->getName()));
2294 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2295 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2296 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2297 if (!CGF.HaveInsertPoint())
2298 return;
2300 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2301 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2302 (void)PreCondScope.Privatize();
2303 // Get initial values of real counters.
2304 for (const Expr *I : S.inits()) {
2305 CGF.EmitIgnoredExpr(I);
2308 // Create temp loop control variables with their init values to support
2309 // non-rectangular loops.
2310 CodeGenFunction::OMPMapVars PreCondVars;
2311 for (const Expr *E : S.dependent_counters()) {
2312 if (!E)
2313 continue;
2314 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2315 "dependent counter must not be an iterator.");
2316 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2317 Address CounterAddr =
2318 CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2319 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2321 (void)PreCondVars.apply(CGF);
2322 for (const Expr *E : S.dependent_inits()) {
2323 if (!E)
2324 continue;
2325 CGF.EmitIgnoredExpr(E);
2327 // Check that loop is executed at least one time.
2328 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2329 PreCondVars.restore(CGF);
2332 void CodeGenFunction::EmitOMPLinearClause(
2333 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2334 if (!HaveInsertPoint())
2335 return;
2336 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2337 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2338 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2339 for (const Expr *C : LoopDirective->counters()) {
2340 SIMDLCVs.insert(
2341 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2344 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2345 auto CurPrivate = C->privates().begin();
2346 for (const Expr *E : C->varlists()) {
2347 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2348 const auto *PrivateVD =
2349 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2350 if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2351 // Emit private VarDecl with copy init.
2352 EmitVarDecl(*PrivateVD);
2353 bool IsRegistered =
2354 PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD));
2355 assert(IsRegistered && "linear var already registered as private");
2356 // Silence the warning about unused variable.
2357 (void)IsRegistered;
2358 } else {
2359 EmitVarDecl(*PrivateVD);
2361 ++CurPrivate;
2366 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2367 const OMPExecutableDirective &D) {
2368 if (!CGF.HaveInsertPoint())
2369 return;
2370 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2371 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2372 /*ignoreResult=*/true);
2373 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2374 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2375 // In presence of finite 'safelen', it may be unsafe to mark all
2376 // the memory instructions parallel, because loop-carried
2377 // dependences of 'safelen' iterations are possible.
2378 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2379 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2380 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2381 /*ignoreResult=*/true);
2382 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2383 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2384 // In presence of finite 'safelen', it may be unsafe to mark all
2385 // the memory instructions parallel, because loop-carried
2386 // dependences of 'safelen' iterations are possible.
2387 CGF.LoopStack.setParallel(/*Enable=*/false);
2391 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2392 // Walk clauses and process safelen/lastprivate.
2393 LoopStack.setParallel(/*Enable=*/true);
2394 LoopStack.setVectorizeEnable();
2395 emitSimdlenSafelenClause(*this, D);
2396 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2397 if (C->getKind() == OMPC_ORDER_concurrent)
2398 LoopStack.setParallel(/*Enable=*/true);
2399 if ((D.getDirectiveKind() == OMPD_simd ||
2400 (getLangOpts().OpenMPSimd &&
2401 isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2402 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2403 [](const OMPReductionClause *C) {
2404 return C->getModifier() == OMPC_REDUCTION_inscan;
2406 // Disable parallel access in case of prefix sum.
2407 LoopStack.setParallel(/*Enable=*/false);
2410 void CodeGenFunction::EmitOMPSimdFinal(
2411 const OMPLoopDirective &D,
2412 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2413 if (!HaveInsertPoint())
2414 return;
2415 llvm::BasicBlock *DoneBB = nullptr;
2416 auto IC = D.counters().begin();
2417 auto IPC = D.private_counters().begin();
2418 for (const Expr *F : D.finals()) {
2419 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2420 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2421 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2422 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2423 OrigVD->hasGlobalStorage() || CED) {
2424 if (!DoneBB) {
2425 if (llvm::Value *Cond = CondGen(*this)) {
2426 // If the first post-update expression is found, emit conditional
2427 // block if it was requested.
2428 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2429 DoneBB = createBasicBlock(".omp.final.done");
2430 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2431 EmitBlock(ThenBB);
2434 Address OrigAddr = Address::invalid();
2435 if (CED) {
2436 OrigAddr =
2437 EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2438 } else {
2439 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2440 /*RefersToEnclosingVariableOrCapture=*/false,
2441 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2442 OrigAddr = EmitLValue(&DRE).getAddress(*this);
2444 OMPPrivateScope VarScope(*this);
2445 VarScope.addPrivate(OrigVD, OrigAddr);
2446 (void)VarScope.Privatize();
2447 EmitIgnoredExpr(F);
2449 ++IC;
2450 ++IPC;
2452 if (DoneBB)
2453 EmitBlock(DoneBB, /*IsFinished=*/true);
2456 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2457 const OMPLoopDirective &S,
2458 CodeGenFunction::JumpDest LoopExit) {
2459 CGF.EmitOMPLoopBody(S, LoopExit);
2460 CGF.EmitStopPoint(&S);
2463 /// Emit a helper variable and return corresponding lvalue.
2464 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2465 const DeclRefExpr *Helper) {
2466 auto VDecl = cast<VarDecl>(Helper->getDecl());
2467 CGF.EmitVarDecl(*VDecl);
2468 return CGF.EmitLValue(Helper);
2471 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2472 const RegionCodeGenTy &SimdInitGen,
2473 const RegionCodeGenTy &BodyCodeGen) {
2474 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2475 PrePostActionTy &) {
2476 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2477 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2478 SimdInitGen(CGF);
2480 BodyCodeGen(CGF);
2482 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2483 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2484 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2486 BodyCodeGen(CGF);
2488 const Expr *IfCond = nullptr;
2489 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2490 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2491 if (CGF.getLangOpts().OpenMP >= 50 &&
2492 (C->getNameModifier() == OMPD_unknown ||
2493 C->getNameModifier() == OMPD_simd)) {
2494 IfCond = C->getCondition();
2495 break;
2499 if (IfCond) {
2500 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2501 } else {
2502 RegionCodeGenTy ThenRCG(ThenGen);
2503 ThenRCG(CGF);
2507 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2508 PrePostActionTy &Action) {
2509 Action.Enter(CGF);
2510 assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2511 "Expected simd directive");
2512 OMPLoopScope PreInitScope(CGF, S);
2513 // if (PreCond) {
2514 // for (IV in 0..LastIteration) BODY;
2515 // <Final counter/linear vars updates>;
2516 // }
2518 if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2519 isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2520 isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2521 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2522 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2525 // Emit: if (PreCond) - begin.
2526 // If the condition constant folds and can be elided, avoid emitting the
2527 // whole loop.
2528 bool CondConstant;
2529 llvm::BasicBlock *ContBlock = nullptr;
2530 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2531 if (!CondConstant)
2532 return;
2533 } else {
2534 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2535 ContBlock = CGF.createBasicBlock("simd.if.end");
2536 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2537 CGF.getProfileCount(&S));
2538 CGF.EmitBlock(ThenBlock);
2539 CGF.incrementProfileCounter(&S);
2542 // Emit the loop iteration variable.
2543 const Expr *IVExpr = S.getIterationVariable();
2544 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2545 CGF.EmitVarDecl(*IVDecl);
2546 CGF.EmitIgnoredExpr(S.getInit());
2548 // Emit the iterations count variable.
2549 // If it is not a variable, Sema decided to calculate iterations count on
2550 // each iteration (e.g., it is foldable into a constant).
2551 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2552 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2553 // Emit calculation of the iterations count.
2554 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2557 emitAlignedClause(CGF, S);
2558 (void)CGF.EmitOMPLinearClauseInit(S);
2560 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2561 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2562 CGF.EmitOMPLinearClause(S, LoopScope);
2563 CGF.EmitOMPPrivateClause(S, LoopScope);
2564 CGF.EmitOMPReductionClauseInit(S, LoopScope);
2565 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2566 CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2567 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2568 (void)LoopScope.Privatize();
2569 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2570 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2572 emitCommonSimdLoop(
2573 CGF, S,
2574 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2575 CGF.EmitOMPSimdInit(S);
2577 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2578 CGF.EmitOMPInnerLoop(
2579 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2580 [&S](CodeGenFunction &CGF) {
2581 emitOMPLoopBodyWithStopPoint(CGF, S,
2582 CodeGenFunction::JumpDest());
2584 [](CodeGenFunction &) {});
2586 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2587 // Emit final copy of the lastprivate variables at the end of loops.
2588 if (HasLastprivateClause)
2589 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2590 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2591 emitPostUpdateForReductionClause(CGF, S,
2592 [](CodeGenFunction &) { return nullptr; });
2593 LoopScope.restoreMap();
2594 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2596 // Emit: if (PreCond) - end.
2597 if (ContBlock) {
2598 CGF.EmitBranch(ContBlock);
2599 CGF.EmitBlock(ContBlock, true);
2603 static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
2604 // Check for unsupported clauses
2605 for (OMPClause *C : S.clauses()) {
2606 // Currently only order, simdlen and safelen clauses are supported
2607 if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) ||
2608 isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C)))
2609 return false;
2612 // Check if we have a statement with the ordered directive.
2613 // Visit the statement hierarchy to find a compound statement
2614 // with a ordered directive in it.
2615 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) {
2616 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
2617 for (const Stmt *SubStmt : SyntacticalLoop->children()) {
2618 if (!SubStmt)
2619 continue;
2620 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) {
2621 for (const Stmt *CSSubStmt : CS->children()) {
2622 if (!CSSubStmt)
2623 continue;
2624 if (isa<OMPOrderedDirective>(CSSubStmt)) {
2625 return false;
2632 return true;
2634 static llvm::MapVector<llvm::Value *, llvm::Value *>
2635 GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
2636 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
2637 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
2638 llvm::APInt ClauseAlignment(64, 0);
2639 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2640 auto *AlignmentCI =
2641 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2642 ClauseAlignment = AlignmentCI->getValue();
2644 for (const Expr *E : Clause->varlists()) {
2645 llvm::APInt Alignment(ClauseAlignment);
2646 if (Alignment == 0) {
2647 // OpenMP [2.8.1, Description]
2648 // If no optional parameter is specified, implementation-defined default
2649 // alignments for SIMD instructions on the target platforms are assumed.
2650 Alignment =
2651 CGF.getContext()
2652 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2653 E->getType()->getPointeeType()))
2654 .getQuantity();
2656 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2657 "alignment is not power of 2");
2658 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2659 AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue());
2662 return AlignedVars;
2665 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2666 bool UseOMPIRBuilder =
2667 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
2668 if (UseOMPIRBuilder) {
2669 auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF,
2670 PrePostActionTy &) {
2671 // Use the OpenMPIRBuilder if enabled.
2672 if (UseOMPIRBuilder) {
2673 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
2674 GetAlignedMapping(S, CGF);
2675 // Emit the associated statement and get its loop representation.
2676 const Stmt *Inner = S.getRawStmt();
2677 llvm::CanonicalLoopInfo *CLI =
2678 EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2680 llvm::OpenMPIRBuilder &OMPBuilder =
2681 CGM.getOpenMPRuntime().getOMPBuilder();
2682 // Add SIMD specific metadata
2683 llvm::ConstantInt *Simdlen = nullptr;
2684 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
2685 RValue Len =
2686 this->EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2687 /*ignoreResult=*/true);
2688 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2689 Simdlen = Val;
2691 llvm::ConstantInt *Safelen = nullptr;
2692 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
2693 RValue Len =
2694 this->EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2695 /*ignoreResult=*/true);
2696 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2697 Safelen = Val;
2699 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
2700 if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
2701 if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) {
2702 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
2705 // Add simd metadata to the collapsed loop. Do not generate
2706 // another loop for if clause. Support for if clause is done earlier.
2707 OMPBuilder.applySimd(CLI, AlignedVars,
2708 /*IfCond*/ nullptr, Order, Simdlen, Safelen);
2709 return;
2713 auto LPCRegion =
2714 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2715 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2716 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd,
2717 CodeGenIRBuilder);
2719 return;
2722 ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2723 OMPFirstScanLoop = true;
2724 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2725 emitOMPSimdRegion(CGF, S, Action);
2728 auto LPCRegion =
2729 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2730 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2731 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2733 // Check for outer lastprivate conditional update.
2734 checkForLastprivateConditionalUpdate(*this, S);
2737 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2738 // Emit the de-sugared statement.
2739 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2740 EmitStmt(S.getTransformedStmt());
2743 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2744 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2746 if (UseOMPIRBuilder) {
2747 auto DL = SourceLocToDebugLoc(S.getBeginLoc());
2748 const Stmt *Inner = S.getRawStmt();
2750 // Consume nested loop. Clear the entire remaining loop stack because a
2751 // fully unrolled loop is non-transformable. For partial unrolling the
2752 // generated outer loop is pushed back to the stack.
2753 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2754 OMPLoopNestStack.clear();
2756 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2758 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
2759 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
2761 if (S.hasClausesOfKind<OMPFullClause>()) {
2762 assert(ExpectedOMPLoopDepth == 0);
2763 OMPBuilder.unrollLoopFull(DL, CLI);
2764 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2765 uint64_t Factor = 0;
2766 if (Expr *FactorExpr = PartialClause->getFactor()) {
2767 Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2768 assert(Factor >= 1 && "Only positive factors are valid");
2770 OMPBuilder.unrollLoopPartial(DL, CLI, Factor,
2771 NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
2772 } else {
2773 OMPBuilder.unrollLoopHeuristic(DL, CLI);
2776 assert((!NeedsUnrolledCLI || UnrolledCLI) &&
2777 "NeedsUnrolledCLI implies UnrolledCLI to be set");
2778 if (UnrolledCLI)
2779 OMPLoopNestStack.push_back(UnrolledCLI);
2781 return;
2784 // This function is only called if the unrolled loop is not consumed by any
2785 // other loop-associated construct. Such a loop-associated construct will have
2786 // used the transformed AST.
2788 // Set the unroll metadata for the next emitted loop.
2789 LoopStack.setUnrollState(LoopAttributes::Enable);
2791 if (S.hasClausesOfKind<OMPFullClause>()) {
2792 LoopStack.setUnrollState(LoopAttributes::Full);
2793 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2794 if (Expr *FactorExpr = PartialClause->getFactor()) {
2795 uint64_t Factor =
2796 FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2797 assert(Factor >= 1 && "Only positive factors are valid");
2798 LoopStack.setUnrollCount(Factor);
2802 EmitStmt(S.getAssociatedStmt());
2805 void CodeGenFunction::EmitOMPOuterLoop(
2806 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2807 CodeGenFunction::OMPPrivateScope &LoopScope,
2808 const CodeGenFunction::OMPLoopArguments &LoopArgs,
2809 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2810 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2811 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2813 const Expr *IVExpr = S.getIterationVariable();
2814 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2815 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2817 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2819 // Start the loop with a block that tests the condition.
2820 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2821 EmitBlock(CondBlock);
2822 const SourceRange R = S.getSourceRange();
2823 OMPLoopNestStack.clear();
2824 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2825 SourceLocToDebugLoc(R.getEnd()));
2827 llvm::Value *BoolCondVal = nullptr;
2828 if (!DynamicOrOrdered) {
2829 // UB = min(UB, GlobalUB) or
2830 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2831 // 'distribute parallel for')
2832 EmitIgnoredExpr(LoopArgs.EUB);
2833 // IV = LB
2834 EmitIgnoredExpr(LoopArgs.Init);
2835 // IV < UB
2836 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2837 } else {
2838 BoolCondVal =
2839 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2840 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2843 // If there are any cleanups between here and the loop-exit scope,
2844 // create a block to stage a loop exit along.
2845 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2846 if (LoopScope.requiresCleanups())
2847 ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2849 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2850 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2851 if (ExitBlock != LoopExit.getBlock()) {
2852 EmitBlock(ExitBlock);
2853 EmitBranchThroughCleanup(LoopExit);
2855 EmitBlock(LoopBody);
2857 // Emit "IV = LB" (in case of static schedule, we have already calculated new
2858 // LB for loop condition and emitted it above).
2859 if (DynamicOrOrdered)
2860 EmitIgnoredExpr(LoopArgs.Init);
2862 // Create a block for the increment.
2863 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2864 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2866 emitCommonSimdLoop(
2867 *this, S,
2868 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2869 // Generate !llvm.loop.parallel metadata for loads and stores for loops
2870 // with dynamic/guided scheduling and without ordered clause.
2871 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2872 CGF.LoopStack.setParallel(!IsMonotonic);
2873 if (const auto *C = S.getSingleClause<OMPOrderClause>())
2874 if (C->getKind() == OMPC_ORDER_concurrent)
2875 CGF.LoopStack.setParallel(/*Enable=*/true);
2876 } else {
2877 CGF.EmitOMPSimdInit(S);
2880 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2881 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2882 SourceLocation Loc = S.getBeginLoc();
2883 // when 'distribute' is not combined with a 'for':
2884 // while (idx <= UB) { BODY; ++idx; }
2885 // when 'distribute' is combined with a 'for'
2886 // (e.g. 'distribute parallel for')
2887 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2888 CGF.EmitOMPInnerLoop(
2889 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2890 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2891 CodeGenLoop(CGF, S, LoopExit);
2893 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2894 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2898 EmitBlock(Continue.getBlock());
2899 BreakContinueStack.pop_back();
2900 if (!DynamicOrOrdered) {
2901 // Emit "LB = LB + Stride", "UB = UB + Stride".
2902 EmitIgnoredExpr(LoopArgs.NextLB);
2903 EmitIgnoredExpr(LoopArgs.NextUB);
2906 EmitBranch(CondBlock);
2907 OMPLoopNestStack.clear();
2908 LoopStack.pop();
2909 // Emit the fall-through block.
2910 EmitBlock(LoopExit.getBlock());
2912 // Tell the runtime we are done.
2913 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2914 if (!DynamicOrOrdered)
2915 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2916 S.getDirectiveKind());
2918 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2921 void CodeGenFunction::EmitOMPForOuterLoop(
2922 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2923 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2924 const OMPLoopArguments &LoopArgs,
2925 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2926 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2928 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2929 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule);
2931 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
2932 LoopArgs.Chunk != nullptr)) &&
2933 "static non-chunked schedule does not need outer loop");
2935 // Emit outer loop.
2937 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2938 // When schedule(dynamic,chunk_size) is specified, the iterations are
2939 // distributed to threads in the team in chunks as the threads request them.
2940 // Each thread executes a chunk of iterations, then requests another chunk,
2941 // until no chunks remain to be distributed. Each chunk contains chunk_size
2942 // iterations, except for the last chunk to be distributed, which may have
2943 // fewer iterations. When no chunk_size is specified, it defaults to 1.
2945 // When schedule(guided,chunk_size) is specified, the iterations are assigned
2946 // to threads in the team in chunks as the executing threads request them.
2947 // Each thread executes a chunk of iterations, then requests another chunk,
2948 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2949 // each chunk is proportional to the number of unassigned iterations divided
2950 // by the number of threads in the team, decreasing to 1. For a chunk_size
2951 // with value k (greater than 1), the size of each chunk is determined in the
2952 // same way, with the restriction that the chunks do not contain fewer than k
2953 // iterations (except for the last chunk to be assigned, which may have fewer
2954 // than k iterations).
2956 // When schedule(auto) is specified, the decision regarding scheduling is
2957 // delegated to the compiler and/or runtime system. The programmer gives the
2958 // implementation the freedom to choose any possible mapping of iterations to
2959 // threads in the team.
2961 // When schedule(runtime) is specified, the decision regarding scheduling is
2962 // deferred until run time, and the schedule and chunk size are taken from the
2963 // run-sched-var ICV. If the ICV is set to auto, the schedule is
2964 // implementation defined
2966 // while(__kmpc_dispatch_next(&LB, &UB)) {
2967 // idx = LB;
2968 // while (idx <= UB) { BODY; ++idx;
2969 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2970 // } // inner loop
2971 // }
2973 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2974 // When schedule(static, chunk_size) is specified, iterations are divided into
2975 // chunks of size chunk_size, and the chunks are assigned to the threads in
2976 // the team in a round-robin fashion in the order of the thread number.
2978 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2979 // while (idx <= UB) { BODY; ++idx; } // inner loop
2980 // LB = LB + ST;
2981 // UB = UB + ST;
2982 // }
2985 const Expr *IVExpr = S.getIterationVariable();
2986 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2987 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2989 if (DynamicOrOrdered) {
2990 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2991 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2992 llvm::Value *LBVal = DispatchBounds.first;
2993 llvm::Value *UBVal = DispatchBounds.second;
2994 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2995 LoopArgs.Chunk};
2996 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2997 IVSigned, Ordered, DipatchRTInputValues);
2998 } else {
2999 CGOpenMPRuntime::StaticRTInput StaticInit(
3000 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
3001 LoopArgs.ST, LoopArgs.Chunk);
3002 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
3003 ScheduleKind, StaticInit);
3006 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
3007 const unsigned IVSize,
3008 const bool IVSigned) {
3009 if (Ordered) {
3010 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
3011 IVSigned);
3015 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
3016 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
3017 OuterLoopArgs.IncExpr = S.getInc();
3018 OuterLoopArgs.Init = S.getInit();
3019 OuterLoopArgs.Cond = S.getCond();
3020 OuterLoopArgs.NextLB = S.getNextLowerBound();
3021 OuterLoopArgs.NextUB = S.getNextUpperBound();
3022 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
3023 emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3026 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
3027 const unsigned IVSize, const bool IVSigned) {}
3029 void CodeGenFunction::EmitOMPDistributeOuterLoop(
3030 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
3031 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
3032 const CodeGenLoopTy &CodeGenLoopContent) {
3034 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3036 // Emit outer loop.
3037 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3038 // dynamic
3041 const Expr *IVExpr = S.getIterationVariable();
3042 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3043 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3045 CGOpenMPRuntime::StaticRTInput StaticInit(
3046 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
3047 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
3048 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
3050 // for combined 'distribute' and 'for' the increment expression of distribute
3051 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3052 Expr *IncExpr;
3053 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
3054 IncExpr = S.getDistInc();
3055 else
3056 IncExpr = S.getInc();
3058 // this routine is shared by 'omp distribute parallel for' and
3059 // 'omp distribute': select the right EUB expression depending on the
3060 // directive
3061 OMPLoopArguments OuterLoopArgs;
3062 OuterLoopArgs.LB = LoopArgs.LB;
3063 OuterLoopArgs.UB = LoopArgs.UB;
3064 OuterLoopArgs.ST = LoopArgs.ST;
3065 OuterLoopArgs.IL = LoopArgs.IL;
3066 OuterLoopArgs.Chunk = LoopArgs.Chunk;
3067 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3068 ? S.getCombinedEnsureUpperBound()
3069 : S.getEnsureUpperBound();
3070 OuterLoopArgs.IncExpr = IncExpr;
3071 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3072 ? S.getCombinedInit()
3073 : S.getInit();
3074 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3075 ? S.getCombinedCond()
3076 : S.getCond();
3077 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3078 ? S.getCombinedNextLowerBound()
3079 : S.getNextLowerBound();
3080 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3081 ? S.getCombinedNextUpperBound()
3082 : S.getNextUpperBound();
3084 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3085 LoopScope, OuterLoopArgs, CodeGenLoopContent,
3086 emitEmptyOrdered);
3089 static std::pair<LValue, LValue>
3090 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3091 const OMPExecutableDirective &S) {
3092 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3093 LValue LB =
3094 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3095 LValue UB =
3096 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3098 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3099 // parallel for') we need to use the 'distribute'
3100 // chunk lower and upper bounds rather than the whole loop iteration
3101 // space. These are parameters to the outlined function for 'parallel'
3102 // and we copy the bounds of the previous schedule into the
3103 // the current ones.
3104 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
3105 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
3106 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3107 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
3108 PrevLBVal = CGF.EmitScalarConversion(
3109 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
3110 LS.getIterationVariable()->getType(),
3111 LS.getPrevLowerBoundVariable()->getExprLoc());
3112 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3113 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
3114 PrevUBVal = CGF.EmitScalarConversion(
3115 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
3116 LS.getIterationVariable()->getType(),
3117 LS.getPrevUpperBoundVariable()->getExprLoc());
3119 CGF.EmitStoreOfScalar(PrevLBVal, LB);
3120 CGF.EmitStoreOfScalar(PrevUBVal, UB);
3122 return {LB, UB};
3125 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3126 /// we need to use the LB and UB expressions generated by the worksharing
3127 /// code generation support, whereas in non combined situations we would
3128 /// just emit 0 and the LastIteration expression
3129 /// This function is necessary due to the difference of the LB and UB
3130 /// types for the RT emission routines for 'for_static_init' and
3131 /// 'for_dispatch_init'
3132 static std::pair<llvm::Value *, llvm::Value *>
3133 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3134 const OMPExecutableDirective &S,
3135 Address LB, Address UB) {
3136 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3137 const Expr *IVExpr = LS.getIterationVariable();
3138 // when implementing a dynamic schedule for a 'for' combined with a
3139 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3140 // is not normalized as each team only executes its own assigned
3141 // distribute chunk
3142 QualType IteratorTy = IVExpr->getType();
3143 llvm::Value *LBVal =
3144 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3145 llvm::Value *UBVal =
3146 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3147 return {LBVal, UBVal};
3150 static void emitDistributeParallelForDistributeInnerBoundParams(
3151 CodeGenFunction &CGF, const OMPExecutableDirective &S,
3152 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3153 const auto &Dir = cast<OMPLoopDirective>(S);
3154 LValue LB =
3155 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
3156 llvm::Value *LBCast =
3157 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
3158 CGF.SizeTy, /*isSigned=*/false);
3159 CapturedVars.push_back(LBCast);
3160 LValue UB =
3161 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
3163 llvm::Value *UBCast =
3164 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
3165 CGF.SizeTy, /*isSigned=*/false);
3166 CapturedVars.push_back(UBCast);
3169 static void
3170 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3171 const OMPLoopDirective &S,
3172 CodeGenFunction::JumpDest LoopExit) {
3173 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
3174 PrePostActionTy &Action) {
3175 Action.Enter(CGF);
3176 bool HasCancel = false;
3177 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
3178 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
3179 HasCancel = D->hasCancel();
3180 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
3181 HasCancel = D->hasCancel();
3182 else if (const auto *D =
3183 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
3184 HasCancel = D->hasCancel();
3186 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3187 HasCancel);
3188 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
3189 emitDistributeParallelForInnerBounds,
3190 emitDistributeParallelForDispatchBounds);
3193 emitCommonOMPParallelDirective(
3194 CGF, S,
3195 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
3196 CGInlinedWorksharingLoop,
3197 emitDistributeParallelForDistributeInnerBoundParams);
3200 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3201 const OMPDistributeParallelForDirective &S) {
3202 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3203 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3204 S.getDistInc());
3206 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3207 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3210 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3211 const OMPDistributeParallelForSimdDirective &S) {
3212 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3213 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3214 S.getDistInc());
3216 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3217 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3220 void CodeGenFunction::EmitOMPDistributeSimdDirective(
3221 const OMPDistributeSimdDirective &S) {
3222 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3223 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3225 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3226 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3229 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3230 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3231 // Emit SPMD target parallel for region as a standalone region.
3232 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3233 emitOMPSimdRegion(CGF, S, Action);
3235 llvm::Function *Fn;
3236 llvm::Constant *Addr;
3237 // Emit target region as a standalone region.
3238 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3239 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3240 assert(Fn && Addr && "Target device function emission failed.");
3243 void CodeGenFunction::EmitOMPTargetSimdDirective(
3244 const OMPTargetSimdDirective &S) {
3245 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3246 emitOMPSimdRegion(CGF, S, Action);
3248 emitCommonOMPTargetDirective(*this, S, CodeGen);
3251 namespace {
3252 struct ScheduleKindModifiersTy {
3253 OpenMPScheduleClauseKind Kind;
3254 OpenMPScheduleClauseModifier M1;
3255 OpenMPScheduleClauseModifier M2;
3256 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3257 OpenMPScheduleClauseModifier M1,
3258 OpenMPScheduleClauseModifier M2)
3259 : Kind(Kind), M1(M1), M2(M2) {}
3261 } // namespace
3263 bool CodeGenFunction::EmitOMPWorksharingLoop(
3264 const OMPLoopDirective &S, Expr *EUB,
3265 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3266 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3267 // Emit the loop iteration variable.
3268 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3269 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3270 EmitVarDecl(*IVDecl);
3272 // Emit the iterations count variable.
3273 // If it is not a variable, Sema decided to calculate iterations count on each
3274 // iteration (e.g., it is foldable into a constant).
3275 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3276 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3277 // Emit calculation of the iterations count.
3278 EmitIgnoredExpr(S.getCalcLastIteration());
3281 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3283 bool HasLastprivateClause;
3284 // Check pre-condition.
3286 OMPLoopScope PreInitScope(*this, S);
3287 // Skip the entire loop if we don't meet the precondition.
3288 // If the condition constant folds and can be elided, avoid emitting the
3289 // whole loop.
3290 bool CondConstant;
3291 llvm::BasicBlock *ContBlock = nullptr;
3292 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3293 if (!CondConstant)
3294 return false;
3295 } else {
3296 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3297 ContBlock = createBasicBlock("omp.precond.end");
3298 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3299 getProfileCount(&S));
3300 EmitBlock(ThenBlock);
3301 incrementProfileCounter(&S);
3304 RunCleanupsScope DoacrossCleanupScope(*this);
3305 bool Ordered = false;
3306 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3307 if (OrderedClause->getNumForLoops())
3308 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3309 else
3310 Ordered = true;
3313 llvm::DenseSet<const Expr *> EmittedFinals;
3314 emitAlignedClause(*this, S);
3315 bool HasLinears = EmitOMPLinearClauseInit(S);
3316 // Emit helper vars inits.
3318 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3319 LValue LB = Bounds.first;
3320 LValue UB = Bounds.second;
3321 LValue ST =
3322 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3323 LValue IL =
3324 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3326 // Emit 'then' code.
3328 OMPPrivateScope LoopScope(*this);
3329 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
3330 // Emit implicit barrier to synchronize threads and avoid data races on
3331 // initialization of firstprivate variables and post-update of
3332 // lastprivate variables.
3333 CGM.getOpenMPRuntime().emitBarrierCall(
3334 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3335 /*ForceSimpleCall=*/true);
3337 EmitOMPPrivateClause(S, LoopScope);
3338 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3339 *this, S, EmitLValue(S.getIterationVariable()));
3340 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3341 EmitOMPReductionClauseInit(S, LoopScope);
3342 EmitOMPPrivateLoopCounters(S, LoopScope);
3343 EmitOMPLinearClause(S, LoopScope);
3344 (void)LoopScope.Privatize();
3345 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3346 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3348 // Detect the loop schedule kind and chunk.
3349 const Expr *ChunkExpr = nullptr;
3350 OpenMPScheduleTy ScheduleKind;
3351 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3352 ScheduleKind.Schedule = C->getScheduleKind();
3353 ScheduleKind.M1 = C->getFirstScheduleModifier();
3354 ScheduleKind.M2 = C->getSecondScheduleModifier();
3355 ChunkExpr = C->getChunkSize();
3356 } else {
3357 // Default behaviour for schedule clause.
3358 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3359 *this, S, ScheduleKind.Schedule, ChunkExpr);
3361 bool HasChunkSizeOne = false;
3362 llvm::Value *Chunk = nullptr;
3363 if (ChunkExpr) {
3364 Chunk = EmitScalarExpr(ChunkExpr);
3365 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3366 S.getIterationVariable()->getType(),
3367 S.getBeginLoc());
3368 Expr::EvalResult Result;
3369 if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3370 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3371 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3374 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3375 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3376 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3377 // If the static schedule kind is specified or if the ordered clause is
3378 // specified, and if no monotonic modifier is specified, the effect will
3379 // be as if the monotonic modifier was specified.
3380 bool StaticChunkedOne =
3381 RT.isStaticChunked(ScheduleKind.Schedule,
3382 /* Chunked */ Chunk != nullptr) &&
3383 HasChunkSizeOne &&
3384 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3385 bool IsMonotonic =
3386 Ordered ||
3387 (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3388 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3389 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3390 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3391 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3392 if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3393 /* Chunked */ Chunk != nullptr) ||
3394 StaticChunkedOne) &&
3395 !Ordered) {
3396 JumpDest LoopExit =
3397 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3398 emitCommonSimdLoop(
3399 *this, S,
3400 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3401 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3402 CGF.EmitOMPSimdInit(S);
3403 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3404 if (C->getKind() == OMPC_ORDER_concurrent)
3405 CGF.LoopStack.setParallel(/*Enable=*/true);
3408 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3409 &S, ScheduleKind, LoopExit,
3410 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3411 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3412 // When no chunk_size is specified, the iteration space is divided
3413 // into chunks that are approximately equal in size, and at most
3414 // one chunk is distributed to each thread. Note that the size of
3415 // the chunks is unspecified in this case.
3416 CGOpenMPRuntime::StaticRTInput StaticInit(
3417 IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3418 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3419 StaticChunkedOne ? Chunk : nullptr);
3420 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3421 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3422 StaticInit);
3423 // UB = min(UB, GlobalUB);
3424 if (!StaticChunkedOne)
3425 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3426 // IV = LB;
3427 CGF.EmitIgnoredExpr(S.getInit());
3428 // For unchunked static schedule generate:
3430 // while (idx <= UB) {
3431 // BODY;
3432 // ++idx;
3433 // }
3435 // For static schedule with chunk one:
3437 // while (IV <= PrevUB) {
3438 // BODY;
3439 // IV += ST;
3440 // }
3441 CGF.EmitOMPInnerLoop(
3442 S, LoopScope.requiresCleanups(),
3443 StaticChunkedOne ? S.getCombinedParForInDistCond()
3444 : S.getCond(),
3445 StaticChunkedOne ? S.getDistInc() : S.getInc(),
3446 [&S, LoopExit](CodeGenFunction &CGF) {
3447 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3449 [](CodeGenFunction &) {});
3451 EmitBlock(LoopExit.getBlock());
3452 // Tell the runtime we are done.
3453 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3454 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3455 S.getDirectiveKind());
3457 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3458 } else {
3459 // Emit the outer loop, which requests its work chunk [LB..UB] from
3460 // runtime and runs the inner loop to process it.
3461 const OMPLoopArguments LoopArguments(
3462 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3463 IL.getAddress(*this), Chunk, EUB);
3464 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3465 LoopArguments, CGDispatchBounds);
3467 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3468 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3469 return CGF.Builder.CreateIsNotNull(
3470 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3473 EmitOMPReductionClauseFinal(
3474 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3475 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3476 : /*Parallel only*/ OMPD_parallel);
3477 // Emit post-update of the reduction variables if IsLastIter != 0.
3478 emitPostUpdateForReductionClause(
3479 *this, S, [IL, &S](CodeGenFunction &CGF) {
3480 return CGF.Builder.CreateIsNotNull(
3481 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3483 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3484 if (HasLastprivateClause)
3485 EmitOMPLastprivateClauseFinal(
3486 S, isOpenMPSimdDirective(S.getDirectiveKind()),
3487 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3488 LoopScope.restoreMap();
3489 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3490 return CGF.Builder.CreateIsNotNull(
3491 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3494 DoacrossCleanupScope.ForceCleanup();
3495 // We're now done with the loop, so jump to the continuation block.
3496 if (ContBlock) {
3497 EmitBranch(ContBlock);
3498 EmitBlock(ContBlock, /*IsFinished=*/true);
3501 return HasLastprivateClause;
3504 /// The following two functions generate expressions for the loop lower
3505 /// and upper bounds in case of static and dynamic (dispatch) schedule
3506 /// of the associated 'for' or 'distribute' loop.
3507 static std::pair<LValue, LValue>
3508 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3509 const auto &LS = cast<OMPLoopDirective>(S);
3510 LValue LB =
3511 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3512 LValue UB =
3513 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3514 return {LB, UB};
3517 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3518 /// consider the lower and upper bound expressions generated by the
3519 /// worksharing loop support, but we use 0 and the iteration space size as
3520 /// constants
3521 static std::pair<llvm::Value *, llvm::Value *>
3522 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3523 Address LB, Address UB) {
3524 const auto &LS = cast<OMPLoopDirective>(S);
3525 const Expr *IVExpr = LS.getIterationVariable();
3526 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3527 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3528 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3529 return {LBVal, UBVal};
3532 /// Emits internal temp array declarations for the directive with inscan
3533 /// reductions.
3534 /// The code is the following:
3535 /// \code
3536 /// size num_iters = <num_iters>;
3537 /// <type> buffer[num_iters];
3538 /// \endcode
3539 static void emitScanBasedDirectiveDecls(
3540 CodeGenFunction &CGF, const OMPLoopDirective &S,
3541 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3542 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3543 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3544 SmallVector<const Expr *, 4> Shareds;
3545 SmallVector<const Expr *, 4> Privates;
3546 SmallVector<const Expr *, 4> ReductionOps;
3547 SmallVector<const Expr *, 4> CopyArrayTemps;
3548 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3549 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3550 "Only inscan reductions are expected.");
3551 Shareds.append(C->varlist_begin(), C->varlist_end());
3552 Privates.append(C->privates().begin(), C->privates().end());
3553 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3554 CopyArrayTemps.append(C->copy_array_temps().begin(),
3555 C->copy_array_temps().end());
3558 // Emit buffers for each reduction variables.
3559 // ReductionCodeGen is required to emit correctly the code for array
3560 // reductions.
3561 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3562 unsigned Count = 0;
3563 auto *ITA = CopyArrayTemps.begin();
3564 for (const Expr *IRef : Privates) {
3565 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3566 // Emit variably modified arrays, used for arrays/array sections
3567 // reductions.
3568 if (PrivateVD->getType()->isVariablyModifiedType()) {
3569 RedCG.emitSharedOrigLValue(CGF, Count);
3570 RedCG.emitAggregateType(CGF, Count);
3572 CodeGenFunction::OpaqueValueMapping DimMapping(
3573 CGF,
3574 cast<OpaqueValueExpr>(
3575 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3576 ->getSizeExpr()),
3577 RValue::get(OMPScanNumIterations));
3578 // Emit temp buffer.
3579 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3580 ++ITA;
3581 ++Count;
3586 /// Copies final inscan reductions values to the original variables.
3587 /// The code is the following:
3588 /// \code
3589 /// <orig_var> = buffer[num_iters-1];
3590 /// \endcode
3591 static void emitScanBasedDirectiveFinals(
3592 CodeGenFunction &CGF, const OMPLoopDirective &S,
3593 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3594 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3595 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3596 SmallVector<const Expr *, 4> Shareds;
3597 SmallVector<const Expr *, 4> LHSs;
3598 SmallVector<const Expr *, 4> RHSs;
3599 SmallVector<const Expr *, 4> Privates;
3600 SmallVector<const Expr *, 4> CopyOps;
3601 SmallVector<const Expr *, 4> CopyArrayElems;
3602 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3603 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3604 "Only inscan reductions are expected.");
3605 Shareds.append(C->varlist_begin(), C->varlist_end());
3606 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3607 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3608 Privates.append(C->privates().begin(), C->privates().end());
3609 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
3610 CopyArrayElems.append(C->copy_array_elems().begin(),
3611 C->copy_array_elems().end());
3613 // Create temp var and copy LHS value to this temp value.
3614 // LHS = TMP[LastIter];
3615 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
3616 OMPScanNumIterations,
3617 llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false));
3618 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
3619 const Expr *PrivateExpr = Privates[I];
3620 const Expr *OrigExpr = Shareds[I];
3621 const Expr *CopyArrayElem = CopyArrayElems[I];
3622 CodeGenFunction::OpaqueValueMapping IdxMapping(
3623 CGF,
3624 cast<OpaqueValueExpr>(
3625 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3626 RValue::get(OMPLast));
3627 LValue DestLVal = CGF.EmitLValue(OrigExpr);
3628 LValue SrcLVal = CGF.EmitLValue(CopyArrayElem);
3629 CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF),
3630 SrcLVal.getAddress(CGF),
3631 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
3632 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
3633 CopyOps[I]);
3637 /// Emits the code for the directive with inscan reductions.
3638 /// The code is the following:
3639 /// \code
3640 /// #pragma omp ...
3641 /// for (i: 0..<num_iters>) {
3642 /// <input phase>;
3643 /// buffer[i] = red;
3644 /// }
3645 /// #pragma omp master // in parallel region
3646 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3647 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3648 /// buffer[i] op= buffer[i-pow(2,k)];
3649 /// #pragma omp barrier // in parallel region
3650 /// #pragma omp ...
3651 /// for (0..<num_iters>) {
3652 /// red = InclusiveScan ? buffer[i] : buffer[i-1];
3653 /// <scan phase>;
3654 /// }
3655 /// \endcode
3656 static void emitScanBasedDirective(
3657 CodeGenFunction &CGF, const OMPLoopDirective &S,
3658 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3659 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3660 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3661 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3662 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3663 SmallVector<const Expr *, 4> Privates;
3664 SmallVector<const Expr *, 4> ReductionOps;
3665 SmallVector<const Expr *, 4> LHSs;
3666 SmallVector<const Expr *, 4> RHSs;
3667 SmallVector<const Expr *, 4> CopyArrayElems;
3668 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3669 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3670 "Only inscan reductions are expected.");
3671 Privates.append(C->privates().begin(), C->privates().end());
3672 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3673 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3674 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3675 CopyArrayElems.append(C->copy_array_elems().begin(),
3676 C->copy_array_elems().end());
3678 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3680 // Emit loop with input phase:
3681 // #pragma omp ...
3682 // for (i: 0..<num_iters>) {
3683 // <input phase>;
3684 // buffer[i] = red;
3685 // }
3686 CGF.OMPFirstScanLoop = true;
3687 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3688 FirstGen(CGF);
3690 // #pragma omp barrier // in parallel region
3691 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3692 &ReductionOps,
3693 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3694 Action.Enter(CGF);
3695 // Emit prefix reduction:
3696 // #pragma omp master // in parallel region
3697 // for (int k = 0; k <= ceil(log2(n)); ++k)
3698 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3699 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3700 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3701 llvm::Function *F =
3702 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3703 llvm::Value *Arg =
3704 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3705 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3706 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3707 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3708 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3709 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3710 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3711 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3712 CGF.EmitBlock(LoopBB);
3713 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3714 // size pow2k = 1;
3715 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3716 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3717 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3718 // for (size i = n - 1; i >= 2 ^ k; --i)
3719 // tmp[i] op= tmp[i-pow2k];
3720 llvm::BasicBlock *InnerLoopBB =
3721 CGF.createBasicBlock("omp.inner.log.scan.body");
3722 llvm::BasicBlock *InnerExitBB =
3723 CGF.createBasicBlock("omp.inner.log.scan.exit");
3724 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3725 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3726 CGF.EmitBlock(InnerLoopBB);
3727 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3728 IVal->addIncoming(NMin1, LoopBB);
3730 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3731 auto *ILHS = LHSs.begin();
3732 auto *IRHS = RHSs.begin();
3733 for (const Expr *CopyArrayElem : CopyArrayElems) {
3734 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3735 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3736 Address LHSAddr = Address::invalid();
3738 CodeGenFunction::OpaqueValueMapping IdxMapping(
3739 CGF,
3740 cast<OpaqueValueExpr>(
3741 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3742 RValue::get(IVal));
3743 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3745 PrivScope.addPrivate(LHSVD, LHSAddr);
3746 Address RHSAddr = Address::invalid();
3748 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3749 CodeGenFunction::OpaqueValueMapping IdxMapping(
3750 CGF,
3751 cast<OpaqueValueExpr>(
3752 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3753 RValue::get(OffsetIVal));
3754 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3756 PrivScope.addPrivate(RHSVD, RHSAddr);
3757 ++ILHS;
3758 ++IRHS;
3760 PrivScope.Privatize();
3761 CGF.CGM.getOpenMPRuntime().emitReduction(
3762 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3763 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3765 llvm::Value *NextIVal =
3766 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3767 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3768 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3769 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3770 CGF.EmitBlock(InnerExitBB);
3771 llvm::Value *Next =
3772 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3773 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3774 // pow2k <<= 1;
3775 llvm::Value *NextPow2K =
3776 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3777 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3778 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3779 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3780 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3781 CGF.EmitBlock(ExitBB);
3783 if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3784 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3785 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3786 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3787 /*ForceSimpleCall=*/true);
3788 } else {
3789 RegionCodeGenTy RCG(CodeGen);
3790 RCG(CGF);
3793 CGF.OMPFirstScanLoop = false;
3794 SecondGen(CGF);
3797 static bool emitWorksharingDirective(CodeGenFunction &CGF,
3798 const OMPLoopDirective &S,
3799 bool HasCancel) {
3800 bool HasLastprivates;
3801 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3802 [](const OMPReductionClause *C) {
3803 return C->getModifier() == OMPC_REDUCTION_inscan;
3804 })) {
3805 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3806 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3807 OMPLoopScope LoopScope(CGF, S);
3808 return CGF.EmitScalarExpr(S.getNumIterations());
3810 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3811 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3812 CGF, S.getDirectiveKind(), HasCancel);
3813 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3814 emitForLoopBounds,
3815 emitDispatchForLoopBounds);
3816 // Emit an implicit barrier at the end.
3817 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3818 OMPD_for);
3820 const auto &&SecondGen = [&S, HasCancel,
3821 &HasLastprivates](CodeGenFunction &CGF) {
3822 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3823 CGF, S.getDirectiveKind(), HasCancel);
3824 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3825 emitForLoopBounds,
3826 emitDispatchForLoopBounds);
3828 if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3829 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3830 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3831 if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3832 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
3833 } else {
3834 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3835 HasCancel);
3836 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3837 emitForLoopBounds,
3838 emitDispatchForLoopBounds);
3840 return HasLastprivates;
3843 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3844 if (S.hasCancel())
3845 return false;
3846 for (OMPClause *C : S.clauses()) {
3847 if (isa<OMPNowaitClause>(C))
3848 continue;
3850 if (auto *SC = dyn_cast<OMPScheduleClause>(C)) {
3851 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3852 return false;
3853 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3854 return false;
3855 switch (SC->getScheduleKind()) {
3856 case OMPC_SCHEDULE_auto:
3857 case OMPC_SCHEDULE_dynamic:
3858 case OMPC_SCHEDULE_runtime:
3859 case OMPC_SCHEDULE_guided:
3860 case OMPC_SCHEDULE_static:
3861 continue;
3862 case OMPC_SCHEDULE_unknown:
3863 return false;
3867 return false;
3870 return true;
3873 static llvm::omp::ScheduleKind
3874 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
3875 switch (ScheduleClauseKind) {
3876 case OMPC_SCHEDULE_unknown:
3877 return llvm::omp::OMP_SCHEDULE_Default;
3878 case OMPC_SCHEDULE_auto:
3879 return llvm::omp::OMP_SCHEDULE_Auto;
3880 case OMPC_SCHEDULE_dynamic:
3881 return llvm::omp::OMP_SCHEDULE_Dynamic;
3882 case OMPC_SCHEDULE_guided:
3883 return llvm::omp::OMP_SCHEDULE_Guided;
3884 case OMPC_SCHEDULE_runtime:
3885 return llvm::omp::OMP_SCHEDULE_Runtime;
3886 case OMPC_SCHEDULE_static:
3887 return llvm::omp::OMP_SCHEDULE_Static;
3889 llvm_unreachable("Unhandled schedule kind");
3892 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3893 bool HasLastprivates = false;
3894 bool UseOMPIRBuilder =
3895 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
3896 auto &&CodeGen = [this, &S, &HasLastprivates,
3897 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3898 // Use the OpenMPIRBuilder if enabled.
3899 if (UseOMPIRBuilder) {
3900 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3902 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
3903 llvm::Value *ChunkSize = nullptr;
3904 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
3905 SchedKind =
3906 convertClauseKindToSchedKind(SchedClause->getScheduleKind());
3907 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
3908 ChunkSize = EmitScalarExpr(ChunkSizeExpr);
3911 // Emit the associated statement and get its loop representation.
3912 const Stmt *Inner = S.getRawStmt();
3913 llvm::CanonicalLoopInfo *CLI =
3914 EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3916 llvm::OpenMPIRBuilder &OMPBuilder =
3917 CGM.getOpenMPRuntime().getOMPBuilder();
3918 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3919 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3920 OMPBuilder.applyWorkshareLoop(
3921 Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
3922 SchedKind, ChunkSize, /*HasSimdModifier=*/false,
3923 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
3924 /*HasOrderedClause=*/false);
3925 return;
3928 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3931 auto LPCRegion =
3932 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3933 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3934 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3935 S.hasCancel());
3938 if (!UseOMPIRBuilder) {
3939 // Emit an implicit barrier at the end.
3940 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3941 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3943 // Check for outer lastprivate conditional update.
3944 checkForLastprivateConditionalUpdate(*this, S);
3947 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3948 bool HasLastprivates = false;
3949 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3950 PrePostActionTy &) {
3951 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3954 auto LPCRegion =
3955 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3956 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3957 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3960 // Emit an implicit barrier at the end.
3961 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3962 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3963 // Check for outer lastprivate conditional update.
3964 checkForLastprivateConditionalUpdate(*this, S);
3967 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3968 const Twine &Name,
3969 llvm::Value *Init = nullptr) {
3970 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3971 if (Init)
3972 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3973 return LVal;
3976 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3977 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3978 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3979 bool HasLastprivates = false;
3980 auto &&CodeGen = [&S, CapturedStmt, CS,
3981 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3982 const ASTContext &C = CGF.getContext();
3983 QualType KmpInt32Ty =
3984 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3985 // Emit helper vars inits.
3986 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3987 CGF.Builder.getInt32(0));
3988 llvm::ConstantInt *GlobalUBVal = CS != nullptr
3989 ? CGF.Builder.getInt32(CS->size() - 1)
3990 : CGF.Builder.getInt32(0);
3991 LValue UB =
3992 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3993 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3994 CGF.Builder.getInt32(1));
3995 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
3996 CGF.Builder.getInt32(0));
3997 // Loop counter.
3998 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
3999 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4000 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
4001 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4002 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
4003 // Generate condition for loop.
4004 BinaryOperator *Cond = BinaryOperator::Create(
4005 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
4006 S.getBeginLoc(), FPOptionsOverride());
4007 // Increment for loop counter.
4008 UnaryOperator *Inc = UnaryOperator::Create(
4009 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
4010 S.getBeginLoc(), true, FPOptionsOverride());
4011 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
4012 // Iterate through all sections and emit a switch construct:
4013 // switch (IV) {
4014 // case 0:
4015 // <SectionStmt[0]>;
4016 // break;
4017 // ...
4018 // case <NumSection> - 1:
4019 // <SectionStmt[<NumSection> - 1]>;
4020 // break;
4021 // }
4022 // .omp.sections.exit:
4023 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
4024 llvm::SwitchInst *SwitchStmt =
4025 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
4026 ExitBB, CS == nullptr ? 1 : CS->size());
4027 if (CS) {
4028 unsigned CaseNumber = 0;
4029 for (const Stmt *SubStmt : CS->children()) {
4030 auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
4031 CGF.EmitBlock(CaseBB);
4032 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
4033 CGF.EmitStmt(SubStmt);
4034 CGF.EmitBranch(ExitBB);
4035 ++CaseNumber;
4037 } else {
4038 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
4039 CGF.EmitBlock(CaseBB);
4040 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
4041 CGF.EmitStmt(CapturedStmt);
4042 CGF.EmitBranch(ExitBB);
4044 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
4047 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
4048 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
4049 // Emit implicit barrier to synchronize threads and avoid data races on
4050 // initialization of firstprivate variables and post-update of lastprivate
4051 // variables.
4052 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4053 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4054 /*ForceSimpleCall=*/true);
4056 CGF.EmitOMPPrivateClause(S, LoopScope);
4057 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
4058 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4059 CGF.EmitOMPReductionClauseInit(S, LoopScope);
4060 (void)LoopScope.Privatize();
4061 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4062 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4064 // Emit static non-chunked loop.
4065 OpenMPScheduleTy ScheduleKind;
4066 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4067 CGOpenMPRuntime::StaticRTInput StaticInit(
4068 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
4069 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
4070 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
4071 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
4072 // UB = min(UB, GlobalUB);
4073 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
4074 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4075 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
4076 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
4077 // IV = LB;
4078 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
4079 // while (idx <= UB) { BODY; ++idx; }
4080 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
4081 [](CodeGenFunction &) {});
4082 // Tell the runtime we are done.
4083 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4084 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
4085 S.getDirectiveKind());
4087 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
4088 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4089 // Emit post-update of the reduction variables if IsLastIter != 0.
4090 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
4091 return CGF.Builder.CreateIsNotNull(
4092 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4095 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4096 if (HasLastprivates)
4097 CGF.EmitOMPLastprivateClauseFinal(
4098 S, /*NoFinals=*/false,
4099 CGF.Builder.CreateIsNotNull(
4100 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
4103 bool HasCancel = false;
4104 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
4105 HasCancel = OSD->hasCancel();
4106 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
4107 HasCancel = OPSD->hasCancel();
4108 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
4109 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
4110 HasCancel);
4111 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4112 // clause. Otherwise the barrier will be generated by the codegen for the
4113 // directive.
4114 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
4115 // Emit implicit barrier to synchronize threads and avoid data races on
4116 // initialization of firstprivate variables.
4117 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4118 OMPD_unknown);
4122 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4123 if (CGM.getLangOpts().OpenMPIRBuilder) {
4124 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4125 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4126 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4128 auto FiniCB = [this](InsertPointTy IP) {
4129 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4132 const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4133 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4134 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
4135 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4136 if (CS) {
4137 for (const Stmt *SubStmt : CS->children()) {
4138 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
4139 InsertPointTy CodeGenIP) {
4140 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4141 *this, SubStmt, AllocaIP, CodeGenIP, "section");
4143 SectionCBVector.push_back(SectionCB);
4145 } else {
4146 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
4147 InsertPointTy CodeGenIP) {
4148 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4149 *this, CapturedStmt, AllocaIP, CodeGenIP, "section");
4151 SectionCBVector.push_back(SectionCB);
4154 // Privatization callback that performs appropriate action for
4155 // shared/private/firstprivate/lastprivate/copyin/... variables.
4157 // TODO: This defaults to shared right now.
4158 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4159 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4160 // The next line is appropriate only for variables (Val) with the
4161 // data-sharing attribute "shared".
4162 ReplVal = &Val;
4164 return CodeGenIP;
4167 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4168 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4169 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4170 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4171 Builder.restoreIP(OMPBuilder.createSections(
4172 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
4173 S.getSingleClause<OMPNowaitClause>()));
4174 return;
4177 auto LPCRegion =
4178 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4179 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4180 EmitSections(S);
4182 // Emit an implicit barrier at the end.
4183 if (!S.getSingleClause<OMPNowaitClause>()) {
4184 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4185 OMPD_sections);
4187 // Check for outer lastprivate conditional update.
4188 checkForLastprivateConditionalUpdate(*this, S);
4191 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4192 if (CGM.getLangOpts().OpenMPIRBuilder) {
4193 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4194 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4196 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4197 auto FiniCB = [this](InsertPointTy IP) {
4198 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4201 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
4202 InsertPointTy CodeGenIP) {
4203 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4204 *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section");
4207 LexicalScope Scope(*this, S.getSourceRange());
4208 EmitStopPoint(&S);
4209 Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
4211 return;
4213 LexicalScope Scope(*this, S.getSourceRange());
4214 EmitStopPoint(&S);
4215 EmitStmt(S.getAssociatedStmt());
4218 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4219 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4220 llvm::SmallVector<const Expr *, 8> DestExprs;
4221 llvm::SmallVector<const Expr *, 8> SrcExprs;
4222 llvm::SmallVector<const Expr *, 8> AssignmentOps;
4223 // Check if there are any 'copyprivate' clauses associated with this
4224 // 'single' construct.
4225 // Build a list of copyprivate variables along with helper expressions
4226 // (<source>, <destination>, <destination>=<source> expressions)
4227 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4228 CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
4229 DestExprs.append(C->destination_exprs().begin(),
4230 C->destination_exprs().end());
4231 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
4232 AssignmentOps.append(C->assignment_ops().begin(),
4233 C->assignment_ops().end());
4235 // Emit code for 'single' region along with 'copyprivate' clauses
4236 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4237 Action.Enter(CGF);
4238 OMPPrivateScope SingleScope(CGF);
4239 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
4240 CGF.EmitOMPPrivateClause(S, SingleScope);
4241 (void)SingleScope.Privatize();
4242 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4245 auto LPCRegion =
4246 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4247 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4248 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
4249 CopyprivateVars, DestExprs,
4250 SrcExprs, AssignmentOps);
4252 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4253 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4254 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4255 CGM.getOpenMPRuntime().emitBarrierCall(
4256 *this, S.getBeginLoc(),
4257 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4259 // Check for outer lastprivate conditional update.
4260 checkForLastprivateConditionalUpdate(*this, S);
4263 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4264 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4265 Action.Enter(CGF);
4266 CGF.EmitStmt(S.getRawStmt());
4268 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
4271 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4272 if (CGM.getLangOpts().OpenMPIRBuilder) {
4273 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4274 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4276 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4278 auto FiniCB = [this](InsertPointTy IP) {
4279 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4282 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4283 InsertPointTy CodeGenIP) {
4284 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4285 *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master");
4288 LexicalScope Scope(*this, S.getSourceRange());
4289 EmitStopPoint(&S);
4290 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
4292 return;
4294 LexicalScope Scope(*this, S.getSourceRange());
4295 EmitStopPoint(&S);
4296 emitMaster(*this, S);
4299 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4300 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4301 Action.Enter(CGF);
4302 CGF.EmitStmt(S.getRawStmt());
4304 Expr *Filter = nullptr;
4305 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4306 Filter = FilterClause->getThreadID();
4307 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
4308 Filter);
4311 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4312 if (CGM.getLangOpts().OpenMPIRBuilder) {
4313 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4314 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4316 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4317 const Expr *Filter = nullptr;
4318 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4319 Filter = FilterClause->getThreadID();
4320 llvm::Value *FilterVal = Filter
4321 ? EmitScalarExpr(Filter, CGM.Int32Ty)
4322 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
4324 auto FiniCB = [this](InsertPointTy IP) {
4325 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4328 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4329 InsertPointTy CodeGenIP) {
4330 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4331 *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked");
4334 LexicalScope Scope(*this, S.getSourceRange());
4335 EmitStopPoint(&S);
4336 Builder.restoreIP(
4337 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4339 return;
4341 LexicalScope Scope(*this, S.getSourceRange());
4342 EmitStopPoint(&S);
4343 emitMasked(*this, S);
4346 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4347 if (CGM.getLangOpts().OpenMPIRBuilder) {
4348 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4349 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4351 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4352 const Expr *Hint = nullptr;
4353 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4354 Hint = HintClause->getHint();
4356 // TODO: This is slightly different from what's currently being done in
4357 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4358 // about typing is final.
4359 llvm::Value *HintInst = nullptr;
4360 if (Hint)
4361 HintInst =
4362 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
4364 auto FiniCB = [this](InsertPointTy IP) {
4365 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4368 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4369 InsertPointTy CodeGenIP) {
4370 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4371 *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical");
4374 LexicalScope Scope(*this, S.getSourceRange());
4375 EmitStopPoint(&S);
4376 Builder.restoreIP(OMPBuilder.createCritical(
4377 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
4378 HintInst));
4380 return;
4383 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4384 Action.Enter(CGF);
4385 CGF.EmitStmt(S.getAssociatedStmt());
4387 const Expr *Hint = nullptr;
4388 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4389 Hint = HintClause->getHint();
4390 LexicalScope Scope(*this, S.getSourceRange());
4391 EmitStopPoint(&S);
4392 CGM.getOpenMPRuntime().emitCriticalRegion(*this,
4393 S.getDirectiveName().getAsString(),
4394 CodeGen, S.getBeginLoc(), Hint);
4397 void CodeGenFunction::EmitOMPParallelForDirective(
4398 const OMPParallelForDirective &S) {
4399 // Emit directive as a combined directive that consists of two implicit
4400 // directives: 'parallel' with 'for' directive.
4401 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4402 Action.Enter(CGF);
4403 emitOMPCopyinClause(CGF, S);
4404 (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4407 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4408 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4409 CGCapturedStmtInfo CGSI(CR_OpenMP);
4410 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4411 OMPLoopScope LoopScope(CGF, S);
4412 return CGF.EmitScalarExpr(S.getNumIterations());
4414 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4415 [](const OMPReductionClause *C) {
4416 return C->getModifier() == OMPC_REDUCTION_inscan;
4418 if (IsInscan)
4419 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4420 auto LPCRegion =
4421 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4422 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
4423 emitEmptyBoundParameters);
4424 if (IsInscan)
4425 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4427 // Check for outer lastprivate conditional update.
4428 checkForLastprivateConditionalUpdate(*this, S);
4431 void CodeGenFunction::EmitOMPParallelForSimdDirective(
4432 const OMPParallelForSimdDirective &S) {
4433 // Emit directive as a combined directive that consists of two implicit
4434 // directives: 'parallel' with 'for' directive.
4435 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4436 Action.Enter(CGF);
4437 emitOMPCopyinClause(CGF, S);
4438 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4441 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4442 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4443 CGCapturedStmtInfo CGSI(CR_OpenMP);
4444 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4445 OMPLoopScope LoopScope(CGF, S);
4446 return CGF.EmitScalarExpr(S.getNumIterations());
4448 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4449 [](const OMPReductionClause *C) {
4450 return C->getModifier() == OMPC_REDUCTION_inscan;
4452 if (IsInscan)
4453 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4454 auto LPCRegion =
4455 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4456 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
4457 emitEmptyBoundParameters);
4458 if (IsInscan)
4459 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4461 // Check for outer lastprivate conditional update.
4462 checkForLastprivateConditionalUpdate(*this, S);
4465 void CodeGenFunction::EmitOMPParallelMasterDirective(
4466 const OMPParallelMasterDirective &S) {
4467 // Emit directive as a combined directive that consists of two implicit
4468 // directives: 'parallel' with 'master' directive.
4469 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4470 Action.Enter(CGF);
4471 OMPPrivateScope PrivateScope(CGF);
4472 emitOMPCopyinClause(CGF, S);
4473 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4474 CGF.EmitOMPPrivateClause(S, PrivateScope);
4475 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4476 (void)PrivateScope.Privatize();
4477 emitMaster(CGF, S);
4478 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4481 auto LPCRegion =
4482 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4483 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
4484 emitEmptyBoundParameters);
4485 emitPostUpdateForReductionClause(*this, S,
4486 [](CodeGenFunction &) { return nullptr; });
4488 // Check for outer lastprivate conditional update.
4489 checkForLastprivateConditionalUpdate(*this, S);
4492 void CodeGenFunction::EmitOMPParallelSectionsDirective(
4493 const OMPParallelSectionsDirective &S) {
4494 // Emit directive as a combined directive that consists of two implicit
4495 // directives: 'parallel' with 'sections' directive.
4496 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4497 Action.Enter(CGF);
4498 emitOMPCopyinClause(CGF, S);
4499 CGF.EmitSections(S);
4502 auto LPCRegion =
4503 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4504 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
4505 emitEmptyBoundParameters);
4507 // Check for outer lastprivate conditional update.
4508 checkForLastprivateConditionalUpdate(*this, S);
4511 namespace {
4512 /// Get the list of variables declared in the context of the untied tasks.
4513 class CheckVarsEscapingUntiedTaskDeclContext final
4514 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4515 llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4517 public:
4518 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4519 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4520 void VisitDeclStmt(const DeclStmt *S) {
4521 if (!S)
4522 return;
4523 // Need to privatize only local vars, static locals can be processed as is.
4524 for (const Decl *D : S->decls()) {
4525 if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4526 if (VD->hasLocalStorage())
4527 PrivateDecls.push_back(VD);
4530 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
4531 void VisitCapturedStmt(const CapturedStmt *) {}
4532 void VisitLambdaExpr(const LambdaExpr *) {}
4533 void VisitBlockExpr(const BlockExpr *) {}
4534 void VisitStmt(const Stmt *S) {
4535 if (!S)
4536 return;
4537 for (const Stmt *Child : S->children())
4538 if (Child)
4539 Visit(Child);
4542 /// Swaps list of vars with the provided one.
4543 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4545 } // anonymous namespace
4547 static void buildDependences(const OMPExecutableDirective &S,
4548 OMPTaskDataTy &Data) {
4550 // First look for 'omp_all_memory' and add this first.
4551 bool OmpAllMemory = false;
4552 if (llvm::any_of(
4553 S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) {
4554 return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
4555 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
4556 })) {
4557 OmpAllMemory = true;
4558 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4559 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4560 // simplify.
4561 OMPTaskDataTy::DependData &DD =
4562 Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory,
4563 /*IteratorExpr=*/nullptr);
4564 // Add a nullptr Expr to simplify the codegen in emitDependData.
4565 DD.DepExprs.push_back(nullptr);
4567 // Add remaining dependences skipping any 'out' or 'inout' if they are
4568 // overridden by 'omp_all_memory'.
4569 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4570 OpenMPDependClauseKind Kind = C->getDependencyKind();
4571 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
4572 continue;
4573 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
4574 continue;
4575 OMPTaskDataTy::DependData &DD =
4576 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4577 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4581 void CodeGenFunction::EmitOMPTaskBasedDirective(
4582 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4583 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4584 OMPTaskDataTy &Data) {
4585 // Emit outlined function for task construct.
4586 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4587 auto I = CS->getCapturedDecl()->param_begin();
4588 auto PartId = std::next(I);
4589 auto TaskT = std::next(I, 4);
4590 // Check if the task is final
4591 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4592 // If the condition constant folds and can be elided, try to avoid emitting
4593 // the condition and the dead arm of the if/else.
4594 const Expr *Cond = Clause->getCondition();
4595 bool CondConstant;
4596 if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
4597 Data.Final.setInt(CondConstant);
4598 else
4599 Data.Final.setPointer(EvaluateExprAsBool(Cond));
4600 } else {
4601 // By default the task is not final.
4602 Data.Final.setInt(/*IntVal=*/false);
4604 // Check if the task has 'priority' clause.
4605 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4606 const Expr *Prio = Clause->getPriority();
4607 Data.Priority.setInt(/*IntVal=*/true);
4608 Data.Priority.setPointer(EmitScalarConversion(
4609 EmitScalarExpr(Prio), Prio->getType(),
4610 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4611 Prio->getExprLoc()));
4613 // The first function argument for tasks is a thread id, the second one is a
4614 // part id (0 for tied tasks, >=0 for untied task).
4615 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4616 // Get list of private variables.
4617 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4618 auto IRef = C->varlist_begin();
4619 for (const Expr *IInit : C->private_copies()) {
4620 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4621 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4622 Data.PrivateVars.push_back(*IRef);
4623 Data.PrivateCopies.push_back(IInit);
4625 ++IRef;
4628 EmittedAsPrivate.clear();
4629 // Get list of firstprivate variables.
4630 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4631 auto IRef = C->varlist_begin();
4632 auto IElemInitRef = C->inits().begin();
4633 for (const Expr *IInit : C->private_copies()) {
4634 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4635 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4636 Data.FirstprivateVars.push_back(*IRef);
4637 Data.FirstprivateCopies.push_back(IInit);
4638 Data.FirstprivateInits.push_back(*IElemInitRef);
4640 ++IRef;
4641 ++IElemInitRef;
4644 // Get list of lastprivate variables (for taskloops).
4645 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4646 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4647 auto IRef = C->varlist_begin();
4648 auto ID = C->destination_exprs().begin();
4649 for (const Expr *IInit : C->private_copies()) {
4650 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4651 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4652 Data.LastprivateVars.push_back(*IRef);
4653 Data.LastprivateCopies.push_back(IInit);
4655 LastprivateDstsOrigs.insert(
4656 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
4657 cast<DeclRefExpr>(*IRef)));
4658 ++IRef;
4659 ++ID;
4662 SmallVector<const Expr *, 4> LHSs;
4663 SmallVector<const Expr *, 4> RHSs;
4664 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4665 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4666 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4667 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4668 Data.ReductionOps.append(C->reduction_ops().begin(),
4669 C->reduction_ops().end());
4670 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4671 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4673 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4674 *this, S.getBeginLoc(), LHSs, RHSs, Data);
4675 // Build list of dependences.
4676 buildDependences(S, Data);
4677 // Get list of local vars for untied tasks.
4678 if (!Data.Tied) {
4679 CheckVarsEscapingUntiedTaskDeclContext Checker;
4680 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4681 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4682 Checker.getPrivateDecls().end());
4684 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4685 CapturedRegion](CodeGenFunction &CGF,
4686 PrePostActionTy &Action) {
4687 llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4688 std::pair<Address, Address>>
4689 UntiedLocalVars;
4690 // Set proper addresses for generated private copies.
4691 OMPPrivateScope Scope(CGF);
4692 // Generate debug info for variables present in shared clause.
4693 if (auto *DI = CGF.getDebugInfo()) {
4694 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
4695 CGF.CapturedStmtInfo->getCaptureFields();
4696 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
4697 if (CaptureFields.size() && ContextValue) {
4698 unsigned CharWidth = CGF.getContext().getCharWidth();
4699 // The shared variables are packed together as members of structure.
4700 // So the address of each shared variable can be computed by adding
4701 // offset of it (within record) to the base address of record. For each
4702 // shared variable, debug intrinsic llvm.dbg.declare is generated with
4703 // appropriate expressions (DIExpression).
4704 // Ex:
4705 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4706 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4707 // metadata !svar1,
4708 // metadata !DIExpression(DW_OP_deref))
4709 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4710 // metadata !svar2,
4711 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4712 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
4713 const VarDecl *SharedVar = It->first;
4714 RecordDecl *CaptureRecord = It->second->getParent();
4715 const ASTRecordLayout &Layout =
4716 CGF.getContext().getASTRecordLayout(CaptureRecord);
4717 unsigned Offset =
4718 Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth;
4719 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4720 (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue,
4721 CGF.Builder, false);
4722 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
4723 // Get the call dbg.declare instruction we just created and update
4724 // its DIExpression to add offset to base address.
4725 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last)) {
4726 SmallVector<uint64_t, 8> Ops;
4727 // Add offset to the base address if non zero.
4728 if (Offset) {
4729 Ops.push_back(llvm::dwarf::DW_OP_plus_uconst);
4730 Ops.push_back(Offset);
4732 Ops.push_back(llvm::dwarf::DW_OP_deref);
4733 auto &Ctx = DDI->getContext();
4734 llvm::DIExpression *DIExpr = llvm::DIExpression::get(Ctx, Ops);
4735 Last.setOperand(2, llvm::MetadataAsValue::get(Ctx, DIExpr));
4740 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
4741 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
4742 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
4743 enum { PrivatesParam = 2, CopyFnParam = 3 };
4744 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4745 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4746 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4747 CS->getCapturedDecl()->getParam(PrivatesParam)));
4748 // Map privates.
4749 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4750 llvm::SmallVector<llvm::Value *, 16> CallArgs;
4751 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4752 CallArgs.push_back(PrivatesPtr);
4753 ParamTypes.push_back(PrivatesPtr->getType());
4754 for (const Expr *E : Data.PrivateVars) {
4755 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4756 Address PrivatePtr = CGF.CreateMemTemp(
4757 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
4758 PrivatePtrs.emplace_back(VD, PrivatePtr);
4759 CallArgs.push_back(PrivatePtr.getPointer());
4760 ParamTypes.push_back(PrivatePtr.getType());
4762 for (const Expr *E : Data.FirstprivateVars) {
4763 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4764 Address PrivatePtr =
4765 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4766 ".firstpriv.ptr.addr");
4767 PrivatePtrs.emplace_back(VD, PrivatePtr);
4768 FirstprivatePtrs.emplace_back(VD, PrivatePtr);
4769 CallArgs.push_back(PrivatePtr.getPointer());
4770 ParamTypes.push_back(PrivatePtr.getType());
4772 for (const Expr *E : Data.LastprivateVars) {
4773 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4774 Address PrivatePtr =
4775 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4776 ".lastpriv.ptr.addr");
4777 PrivatePtrs.emplace_back(VD, PrivatePtr);
4778 CallArgs.push_back(PrivatePtr.getPointer());
4779 ParamTypes.push_back(PrivatePtr.getType());
4781 for (const VarDecl *VD : Data.PrivateLocals) {
4782 QualType Ty = VD->getType().getNonReferenceType();
4783 if (VD->getType()->isLValueReferenceType())
4784 Ty = CGF.getContext().getPointerType(Ty);
4785 if (isAllocatableDecl(VD))
4786 Ty = CGF.getContext().getPointerType(Ty);
4787 Address PrivatePtr = CGF.CreateMemTemp(
4788 CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4789 auto Result = UntiedLocalVars.insert(
4790 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
4791 // If key exists update in place.
4792 if (Result.second == false)
4793 *Result.first = std::make_pair(
4794 VD, std::make_pair(PrivatePtr, Address::invalid()));
4795 CallArgs.push_back(PrivatePtr.getPointer());
4796 ParamTypes.push_back(PrivatePtr.getType());
4798 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4799 ParamTypes, /*isVarArg=*/false);
4800 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4801 CopyFn, CopyFnTy->getPointerTo());
4802 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4803 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4804 for (const auto &Pair : LastprivateDstsOrigs) {
4805 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
4806 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4807 /*RefersToEnclosingVariableOrCapture=*/
4808 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4809 Pair.second->getType(), VK_LValue,
4810 Pair.second->getExprLoc());
4811 Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress(CGF));
4813 for (const auto &Pair : PrivatePtrs) {
4814 Address Replacement = Address(
4815 CGF.Builder.CreateLoad(Pair.second),
4816 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
4817 CGF.getContext().getDeclAlign(Pair.first));
4818 Scope.addPrivate(Pair.first, Replacement);
4819 if (auto *DI = CGF.getDebugInfo())
4820 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4821 (void)DI->EmitDeclareOfAutoVariable(
4822 Pair.first, Pair.second.getPointer(), CGF.Builder,
4823 /*UsePointerValue*/ true);
4825 // Adjust mapping for internal locals by mapping actual memory instead of
4826 // a pointer to this memory.
4827 for (auto &Pair : UntiedLocalVars) {
4828 QualType VDType = Pair.first->getType().getNonReferenceType();
4829 if (isAllocatableDecl(Pair.first)) {
4830 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4831 Address Replacement(
4832 Ptr,
4833 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(VDType)),
4834 CGF.getPointerAlign());
4835 Pair.second.first = Replacement;
4836 Ptr = CGF.Builder.CreateLoad(Replacement);
4837 Replacement = Address(Ptr, CGF.ConvertTypeForMem(VDType),
4838 CGF.getContext().getDeclAlign(Pair.first));
4839 Pair.second.second = Replacement;
4840 } else {
4841 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4842 Address Replacement(Ptr, CGF.ConvertTypeForMem(VDType),
4843 CGF.getContext().getDeclAlign(Pair.first));
4844 Pair.second.first = Replacement;
4848 if (Data.Reductions) {
4849 OMPPrivateScope FirstprivateScope(CGF);
4850 for (const auto &Pair : FirstprivatePtrs) {
4851 Address Replacement(
4852 CGF.Builder.CreateLoad(Pair.second),
4853 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
4854 CGF.getContext().getDeclAlign(Pair.first));
4855 FirstprivateScope.addPrivate(Pair.first, Replacement);
4857 (void)FirstprivateScope.Privatize();
4858 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4859 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4860 Data.ReductionCopies, Data.ReductionOps);
4861 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4862 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4863 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4864 RedCG.emitSharedOrigLValue(CGF, Cnt);
4865 RedCG.emitAggregateType(CGF, Cnt);
4866 // FIXME: This must removed once the runtime library is fixed.
4867 // Emit required threadprivate variables for
4868 // initializer/combiner/finalizer.
4869 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4870 RedCG, Cnt);
4871 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4872 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4873 Replacement =
4874 Address(CGF.EmitScalarConversion(
4875 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4876 CGF.getContext().getPointerType(
4877 Data.ReductionCopies[Cnt]->getType()),
4878 Data.ReductionCopies[Cnt]->getExprLoc()),
4879 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
4880 Replacement.getAlignment());
4881 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4882 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
4885 // Privatize all private variables except for in_reduction items.
4886 (void)Scope.Privatize();
4887 SmallVector<const Expr *, 4> InRedVars;
4888 SmallVector<const Expr *, 4> InRedPrivs;
4889 SmallVector<const Expr *, 4> InRedOps;
4890 SmallVector<const Expr *, 4> TaskgroupDescriptors;
4891 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4892 auto IPriv = C->privates().begin();
4893 auto IRed = C->reduction_ops().begin();
4894 auto ITD = C->taskgroup_descriptors().begin();
4895 for (const Expr *Ref : C->varlists()) {
4896 InRedVars.emplace_back(Ref);
4897 InRedPrivs.emplace_back(*IPriv);
4898 InRedOps.emplace_back(*IRed);
4899 TaskgroupDescriptors.emplace_back(*ITD);
4900 std::advance(IPriv, 1);
4901 std::advance(IRed, 1);
4902 std::advance(ITD, 1);
4905 // Privatize in_reduction items here, because taskgroup descriptors must be
4906 // privatized earlier.
4907 OMPPrivateScope InRedScope(CGF);
4908 if (!InRedVars.empty()) {
4909 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4910 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4911 RedCG.emitSharedOrigLValue(CGF, Cnt);
4912 RedCG.emitAggregateType(CGF, Cnt);
4913 // The taskgroup descriptor variable is always implicit firstprivate and
4914 // privatized already during processing of the firstprivates.
4915 // FIXME: This must removed once the runtime library is fixed.
4916 // Emit required threadprivate variables for
4917 // initializer/combiner/finalizer.
4918 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4919 RedCG, Cnt);
4920 llvm::Value *ReductionsPtr;
4921 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4922 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4923 TRExpr->getExprLoc());
4924 } else {
4925 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4927 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4928 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4929 Replacement = Address(
4930 CGF.EmitScalarConversion(
4931 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4932 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4933 InRedPrivs[Cnt]->getExprLoc()),
4934 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
4935 Replacement.getAlignment());
4936 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4937 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
4940 (void)InRedScope.Privatize();
4942 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4943 UntiedLocalVars);
4944 Action.Enter(CGF);
4945 BodyGen(CGF);
4947 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4948 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4949 Data.NumberOfParts);
4950 OMPLexicalScope Scope(*this, S, std::nullopt,
4951 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4952 !isOpenMPSimdDirective(S.getDirectiveKind()));
4953 TaskGen(*this, OutlinedFn, Data);
4956 static ImplicitParamDecl *
4957 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
4958 QualType Ty, CapturedDecl *CD,
4959 SourceLocation Loc) {
4960 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4961 ImplicitParamDecl::Other);
4962 auto *OrigRef = DeclRefExpr::Create(
4963 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4964 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4965 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4966 ImplicitParamDecl::Other);
4967 auto *PrivateRef = DeclRefExpr::Create(
4968 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
4969 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4970 QualType ElemType = C.getBaseElementType(Ty);
4971 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
4972 ImplicitParamDecl::Other);
4973 auto *InitRef = DeclRefExpr::Create(
4974 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
4975 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
4976 PrivateVD->setInitStyle(VarDecl::CInit);
4977 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
4978 InitRef, /*BasePath=*/nullptr,
4979 VK_PRValue, FPOptionsOverride()));
4980 Data.FirstprivateVars.emplace_back(OrigRef);
4981 Data.FirstprivateCopies.emplace_back(PrivateRef);
4982 Data.FirstprivateInits.emplace_back(InitRef);
4983 return OrigVD;
4986 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
4987 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
4988 OMPTargetDataInfo &InputInfo) {
4989 // Emit outlined function for task construct.
4990 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4991 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4992 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4993 auto I = CS->getCapturedDecl()->param_begin();
4994 auto PartId = std::next(I);
4995 auto TaskT = std::next(I, 4);
4996 OMPTaskDataTy Data;
4997 // The task is not final.
4998 Data.Final.setInt(/*IntVal=*/false);
4999 // Get list of firstprivate variables.
5000 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5001 auto IRef = C->varlist_begin();
5002 auto IElemInitRef = C->inits().begin();
5003 for (auto *IInit : C->private_copies()) {
5004 Data.FirstprivateVars.push_back(*IRef);
5005 Data.FirstprivateCopies.push_back(IInit);
5006 Data.FirstprivateInits.push_back(*IElemInitRef);
5007 ++IRef;
5008 ++IElemInitRef;
5011 SmallVector<const Expr *, 4> LHSs;
5012 SmallVector<const Expr *, 4> RHSs;
5013 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5014 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5015 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5016 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5017 Data.ReductionOps.append(C->reduction_ops().begin(),
5018 C->reduction_ops().end());
5019 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5020 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5022 OMPPrivateScope TargetScope(*this);
5023 VarDecl *BPVD = nullptr;
5024 VarDecl *PVD = nullptr;
5025 VarDecl *SVD = nullptr;
5026 VarDecl *MVD = nullptr;
5027 if (InputInfo.NumberOfTargetItems > 0) {
5028 auto *CD = CapturedDecl::Create(
5029 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5030 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
5031 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
5032 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
5033 /*IndexTypeQuals=*/0);
5034 BPVD = createImplicitFirstprivateForType(
5035 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5036 PVD = createImplicitFirstprivateForType(
5037 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5038 QualType SizesType = getContext().getConstantArrayType(
5039 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5040 ArrSize, nullptr, ArrayType::Normal,
5041 /*IndexTypeQuals=*/0);
5042 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
5043 S.getBeginLoc());
5044 TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray);
5045 TargetScope.addPrivate(PVD, InputInfo.PointersArray);
5046 TargetScope.addPrivate(SVD, InputInfo.SizesArray);
5047 // If there is no user-defined mapper, the mapper array will be nullptr. In
5048 // this case, we don't need to privatize it.
5049 if (!isa_and_nonnull<llvm::ConstantPointerNull>(
5050 InputInfo.MappersArray.getPointer())) {
5051 MVD = createImplicitFirstprivateForType(
5052 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5053 TargetScope.addPrivate(MVD, InputInfo.MappersArray);
5056 (void)TargetScope.Privatize();
5057 buildDependences(S, Data);
5058 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
5059 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5060 // Set proper addresses for generated private copies.
5061 OMPPrivateScope Scope(CGF);
5062 if (!Data.FirstprivateVars.empty()) {
5063 enum { PrivatesParam = 2, CopyFnParam = 3 };
5064 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5065 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
5066 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
5067 CS->getCapturedDecl()->getParam(PrivatesParam)));
5068 // Map privates.
5069 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5070 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5071 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5072 CallArgs.push_back(PrivatesPtr);
5073 ParamTypes.push_back(PrivatesPtr->getType());
5074 for (const Expr *E : Data.FirstprivateVars) {
5075 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5076 Address PrivatePtr =
5077 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
5078 ".firstpriv.ptr.addr");
5079 PrivatePtrs.emplace_back(VD, PrivatePtr);
5080 CallArgs.push_back(PrivatePtr.getPointer());
5081 ParamTypes.push_back(PrivatePtr.getType());
5083 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
5084 ParamTypes, /*isVarArg=*/false);
5085 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5086 CopyFn, CopyFnTy->getPointerTo());
5087 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5088 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
5089 for (const auto &Pair : PrivatePtrs) {
5090 Address Replacement(
5091 CGF.Builder.CreateLoad(Pair.second),
5092 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
5093 CGF.getContext().getDeclAlign(Pair.first));
5094 Scope.addPrivate(Pair.first, Replacement);
5097 CGF.processInReduction(S, Data, CGF, CS, Scope);
5098 if (InputInfo.NumberOfTargetItems > 0) {
5099 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
5100 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
5101 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
5102 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
5103 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
5104 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
5105 // If MVD is nullptr, the mapper array is not privatized
5106 if (MVD)
5107 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
5108 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
5111 Action.Enter(CGF);
5112 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
5113 BodyGen(CGF);
5115 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5116 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
5117 Data.NumberOfParts);
5118 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
5119 IntegerLiteral IfCond(getContext(), TrueOrFalse,
5120 getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
5121 SourceLocation());
5122 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
5123 SharedsTy, CapturedStruct, &IfCond, Data);
5126 void CodeGenFunction::processInReduction(const OMPExecutableDirective &S,
5127 OMPTaskDataTy &Data,
5128 CodeGenFunction &CGF,
5129 const CapturedStmt *CS,
5130 OMPPrivateScope &Scope) {
5131 if (Data.Reductions) {
5132 OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind();
5133 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5134 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5135 Data.ReductionCopies, Data.ReductionOps);
5136 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5137 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(4)));
5138 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5139 RedCG.emitSharedOrigLValue(CGF, Cnt);
5140 RedCG.emitAggregateType(CGF, Cnt);
5141 // FIXME: This must removed once the runtime library is fixed.
5142 // Emit required threadprivate variables for
5143 // initializer/combiner/finalizer.
5144 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5145 RedCG, Cnt);
5146 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5147 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5148 Replacement =
5149 Address(CGF.EmitScalarConversion(
5150 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
5151 CGF.getContext().getPointerType(
5152 Data.ReductionCopies[Cnt]->getType()),
5153 Data.ReductionCopies[Cnt]->getExprLoc()),
5154 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
5155 Replacement.getAlignment());
5156 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5157 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5160 (void)Scope.Privatize();
5161 SmallVector<const Expr *, 4> InRedVars;
5162 SmallVector<const Expr *, 4> InRedPrivs;
5163 SmallVector<const Expr *, 4> InRedOps;
5164 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5165 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5166 auto IPriv = C->privates().begin();
5167 auto IRed = C->reduction_ops().begin();
5168 auto ITD = C->taskgroup_descriptors().begin();
5169 for (const Expr *Ref : C->varlists()) {
5170 InRedVars.emplace_back(Ref);
5171 InRedPrivs.emplace_back(*IPriv);
5172 InRedOps.emplace_back(*IRed);
5173 TaskgroupDescriptors.emplace_back(*ITD);
5174 std::advance(IPriv, 1);
5175 std::advance(IRed, 1);
5176 std::advance(ITD, 1);
5179 OMPPrivateScope InRedScope(CGF);
5180 if (!InRedVars.empty()) {
5181 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5182 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5183 RedCG.emitSharedOrigLValue(CGF, Cnt);
5184 RedCG.emitAggregateType(CGF, Cnt);
5185 // FIXME: This must removed once the runtime library is fixed.
5186 // Emit required threadprivate variables for
5187 // initializer/combiner/finalizer.
5188 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5189 RedCG, Cnt);
5190 llvm::Value *ReductionsPtr;
5191 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5192 ReductionsPtr =
5193 CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), TRExpr->getExprLoc());
5194 } else {
5195 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5197 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5198 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5199 Replacement = Address(
5200 CGF.EmitScalarConversion(
5201 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
5202 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
5203 InRedPrivs[Cnt]->getExprLoc()),
5204 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
5205 Replacement.getAlignment());
5206 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5207 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5210 (void)InRedScope.Privatize();
5213 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
5214 // Emit outlined function for task construct.
5215 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5216 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
5217 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
5218 const Expr *IfCond = nullptr;
5219 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5220 if (C->getNameModifier() == OMPD_unknown ||
5221 C->getNameModifier() == OMPD_task) {
5222 IfCond = C->getCondition();
5223 break;
5227 OMPTaskDataTy Data;
5228 // Check if we should emit tied or untied task.
5229 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
5230 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
5231 CGF.EmitStmt(CS->getCapturedStmt());
5233 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5234 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5235 const OMPTaskDataTy &Data) {
5236 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
5237 SharedsTy, CapturedStruct, IfCond,
5238 Data);
5240 auto LPCRegion =
5241 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
5242 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
5245 void CodeGenFunction::EmitOMPTaskyieldDirective(
5246 const OMPTaskyieldDirective &S) {
5247 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
5250 void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) {
5251 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
5252 Expr *ME = MC ? MC->getMessageString() : nullptr;
5253 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
5254 bool IsFatal = false;
5255 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
5256 IsFatal = true;
5257 CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal);
5260 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5261 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
5264 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5265 OMPTaskDataTy Data;
5266 // Build list of dependences
5267 buildDependences(S, Data);
5268 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
5269 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data);
5272 bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) {
5273 return T.clauses().empty();
5276 void CodeGenFunction::EmitOMPTaskgroupDirective(
5277 const OMPTaskgroupDirective &S) {
5278 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5279 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S)) {
5280 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5281 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5282 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5283 AllocaInsertPt->getIterator());
5285 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
5286 InsertPointTy CodeGenIP) {
5287 Builder.restoreIP(CodeGenIP);
5288 EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5290 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5291 if (!CapturedStmtInfo)
5292 CapturedStmtInfo = &CapStmtInfo;
5293 Builder.restoreIP(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB));
5294 return;
5296 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5297 Action.Enter(CGF);
5298 if (const Expr *E = S.getReductionRef()) {
5299 SmallVector<const Expr *, 4> LHSs;
5300 SmallVector<const Expr *, 4> RHSs;
5301 OMPTaskDataTy Data;
5302 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5303 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5304 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5305 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5306 Data.ReductionOps.append(C->reduction_ops().begin(),
5307 C->reduction_ops().end());
5308 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5309 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5311 llvm::Value *ReductionDesc =
5312 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
5313 LHSs, RHSs, Data);
5314 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5315 CGF.EmitVarDecl(*VD);
5316 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
5317 /*Volatile=*/false, E->getType());
5319 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5321 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
5324 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5325 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5326 ? llvm::AtomicOrdering::NotAtomic
5327 : llvm::AtomicOrdering::AcquireRelease;
5328 CGM.getOpenMPRuntime().emitFlush(
5329 *this,
5330 [&S]() -> ArrayRef<const Expr *> {
5331 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5332 return llvm::ArrayRef(FlushClause->varlist_begin(),
5333 FlushClause->varlist_end());
5334 return std::nullopt;
5335 }(),
5336 S.getBeginLoc(), AO);
5339 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5340 const auto *DO = S.getSingleClause<OMPDepobjClause>();
5341 LValue DOLVal = EmitLValue(DO->getDepobj());
5342 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5343 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
5344 DC->getModifier());
5345 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
5346 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5347 *this, Dependencies, DC->getBeginLoc());
5348 EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
5349 return;
5351 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
5352 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
5353 return;
5355 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
5356 CGM.getOpenMPRuntime().emitUpdateClause(
5357 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
5358 return;
5362 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
5363 if (!OMPParentLoopDirectiveForScan)
5364 return;
5365 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
5366 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
5367 SmallVector<const Expr *, 4> Shareds;
5368 SmallVector<const Expr *, 4> Privates;
5369 SmallVector<const Expr *, 4> LHSs;
5370 SmallVector<const Expr *, 4> RHSs;
5371 SmallVector<const Expr *, 4> ReductionOps;
5372 SmallVector<const Expr *, 4> CopyOps;
5373 SmallVector<const Expr *, 4> CopyArrayTemps;
5374 SmallVector<const Expr *, 4> CopyArrayElems;
5375 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
5376 if (C->getModifier() != OMPC_REDUCTION_inscan)
5377 continue;
5378 Shareds.append(C->varlist_begin(), C->varlist_end());
5379 Privates.append(C->privates().begin(), C->privates().end());
5380 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5381 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5382 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
5383 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
5384 CopyArrayTemps.append(C->copy_array_temps().begin(),
5385 C->copy_array_temps().end());
5386 CopyArrayElems.append(C->copy_array_elems().begin(),
5387 C->copy_array_elems().end());
5389 if (ParentDir.getDirectiveKind() == OMPD_simd ||
5390 (getLangOpts().OpenMPSimd &&
5391 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
5392 // For simd directive and simd-based directives in simd only mode, use the
5393 // following codegen:
5394 // int x = 0;
5395 // #pragma omp simd reduction(inscan, +: x)
5396 // for (..) {
5397 // <first part>
5398 // #pragma omp scan inclusive(x)
5399 // <second part>
5400 // }
5401 // is transformed to:
5402 // int x = 0;
5403 // for (..) {
5404 // int x_priv = 0;
5405 // <first part>
5406 // x = x_priv + x;
5407 // x_priv = x;
5408 // <second part>
5409 // }
5410 // and
5411 // int x = 0;
5412 // #pragma omp simd reduction(inscan, +: x)
5413 // for (..) {
5414 // <first part>
5415 // #pragma omp scan exclusive(x)
5416 // <second part>
5417 // }
5418 // to
5419 // int x = 0;
5420 // for (..) {
5421 // int x_priv = 0;
5422 // <second part>
5423 // int temp = x;
5424 // x = x_priv + x;
5425 // x_priv = temp;
5426 // <first part>
5427 // }
5428 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
5429 EmitBranch(IsInclusive
5430 ? OMPScanReduce
5431 : BreakContinueStack.back().ContinueBlock.getBlock());
5432 EmitBlock(OMPScanDispatch);
5434 // New scope for correct construction/destruction of temp variables for
5435 // exclusive scan.
5436 LexicalScope Scope(*this, S.getSourceRange());
5437 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
5438 EmitBlock(OMPScanReduce);
5439 if (!IsInclusive) {
5440 // Create temp var and copy LHS value to this temp value.
5441 // TMP = LHS;
5442 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5443 const Expr *PrivateExpr = Privates[I];
5444 const Expr *TempExpr = CopyArrayTemps[I];
5445 EmitAutoVarDecl(
5446 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
5447 LValue DestLVal = EmitLValue(TempExpr);
5448 LValue SrcLVal = EmitLValue(LHSs[I]);
5449 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5450 SrcLVal.getAddress(*this),
5451 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5452 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5453 CopyOps[I]);
5456 CGM.getOpenMPRuntime().emitReduction(
5457 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
5458 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
5459 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5460 const Expr *PrivateExpr = Privates[I];
5461 LValue DestLVal;
5462 LValue SrcLVal;
5463 if (IsInclusive) {
5464 DestLVal = EmitLValue(RHSs[I]);
5465 SrcLVal = EmitLValue(LHSs[I]);
5466 } else {
5467 const Expr *TempExpr = CopyArrayTemps[I];
5468 DestLVal = EmitLValue(RHSs[I]);
5469 SrcLVal = EmitLValue(TempExpr);
5471 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5472 SrcLVal.getAddress(*this),
5473 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5474 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5475 CopyOps[I]);
5478 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
5479 OMPScanExitBlock = IsInclusive
5480 ? BreakContinueStack.back().ContinueBlock.getBlock()
5481 : OMPScanReduce;
5482 EmitBlock(OMPAfterScanBlock);
5483 return;
5485 if (!IsInclusive) {
5486 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5487 EmitBlock(OMPScanExitBlock);
5489 if (OMPFirstScanLoop) {
5490 // Emit buffer[i] = red; at the end of the input phase.
5491 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5492 .getIterationVariable()
5493 ->IgnoreParenImpCasts();
5494 LValue IdxLVal = EmitLValue(IVExpr);
5495 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5496 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5497 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5498 const Expr *PrivateExpr = Privates[I];
5499 const Expr *OrigExpr = Shareds[I];
5500 const Expr *CopyArrayElem = CopyArrayElems[I];
5501 OpaqueValueMapping IdxMapping(
5502 *this,
5503 cast<OpaqueValueExpr>(
5504 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5505 RValue::get(IdxVal));
5506 LValue DestLVal = EmitLValue(CopyArrayElem);
5507 LValue SrcLVal = EmitLValue(OrigExpr);
5508 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5509 SrcLVal.getAddress(*this),
5510 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5511 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5512 CopyOps[I]);
5515 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5516 if (IsInclusive) {
5517 EmitBlock(OMPScanExitBlock);
5518 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5520 EmitBlock(OMPScanDispatch);
5521 if (!OMPFirstScanLoop) {
5522 // Emit red = buffer[i]; at the entrance to the scan phase.
5523 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5524 .getIterationVariable()
5525 ->IgnoreParenImpCasts();
5526 LValue IdxLVal = EmitLValue(IVExpr);
5527 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5528 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5529 llvm::BasicBlock *ExclusiveExitBB = nullptr;
5530 if (!IsInclusive) {
5531 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
5532 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
5533 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
5534 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
5535 EmitBlock(ContBB);
5536 // Use idx - 1 iteration for exclusive scan.
5537 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
5539 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5540 const Expr *PrivateExpr = Privates[I];
5541 const Expr *OrigExpr = Shareds[I];
5542 const Expr *CopyArrayElem = CopyArrayElems[I];
5543 OpaqueValueMapping IdxMapping(
5544 *this,
5545 cast<OpaqueValueExpr>(
5546 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5547 RValue::get(IdxVal));
5548 LValue SrcLVal = EmitLValue(CopyArrayElem);
5549 LValue DestLVal = EmitLValue(OrigExpr);
5550 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5551 SrcLVal.getAddress(*this),
5552 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5553 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5554 CopyOps[I]);
5556 if (!IsInclusive) {
5557 EmitBlock(ExclusiveExitBB);
5560 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5561 : OMPAfterScanBlock);
5562 EmitBlock(OMPAfterScanBlock);
5565 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
5566 const CodeGenLoopTy &CodeGenLoop,
5567 Expr *IncExpr) {
5568 // Emit the loop iteration variable.
5569 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
5570 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
5571 EmitVarDecl(*IVDecl);
5573 // Emit the iterations count variable.
5574 // If it is not a variable, Sema decided to calculate iterations count on each
5575 // iteration (e.g., it is foldable into a constant).
5576 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
5577 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
5578 // Emit calculation of the iterations count.
5579 EmitIgnoredExpr(S.getCalcLastIteration());
5582 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5584 bool HasLastprivateClause = false;
5585 // Check pre-condition.
5587 OMPLoopScope PreInitScope(*this, S);
5588 // Skip the entire loop if we don't meet the precondition.
5589 // If the condition constant folds and can be elided, avoid emitting the
5590 // whole loop.
5591 bool CondConstant;
5592 llvm::BasicBlock *ContBlock = nullptr;
5593 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
5594 if (!CondConstant)
5595 return;
5596 } else {
5597 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
5598 ContBlock = createBasicBlock("omp.precond.end");
5599 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
5600 getProfileCount(&S));
5601 EmitBlock(ThenBlock);
5602 incrementProfileCounter(&S);
5605 emitAlignedClause(*this, S);
5606 // Emit 'then' code.
5608 // Emit helper vars inits.
5610 LValue LB = EmitOMPHelperVar(
5611 *this, cast<DeclRefExpr>(
5612 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5613 ? S.getCombinedLowerBoundVariable()
5614 : S.getLowerBoundVariable())));
5615 LValue UB = EmitOMPHelperVar(
5616 *this, cast<DeclRefExpr>(
5617 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5618 ? S.getCombinedUpperBoundVariable()
5619 : S.getUpperBoundVariable())));
5620 LValue ST =
5621 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
5622 LValue IL =
5623 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
5625 OMPPrivateScope LoopScope(*this);
5626 if (EmitOMPFirstprivateClause(S, LoopScope)) {
5627 // Emit implicit barrier to synchronize threads and avoid data races
5628 // on initialization of firstprivate variables and post-update of
5629 // lastprivate variables.
5630 CGM.getOpenMPRuntime().emitBarrierCall(
5631 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
5632 /*ForceSimpleCall=*/true);
5634 EmitOMPPrivateClause(S, LoopScope);
5635 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5636 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5637 !isOpenMPTeamsDirective(S.getDirectiveKind()))
5638 EmitOMPReductionClauseInit(S, LoopScope);
5639 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
5640 EmitOMPPrivateLoopCounters(S, LoopScope);
5641 (void)LoopScope.Privatize();
5642 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5643 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
5645 // Detect the distribute schedule kind and chunk.
5646 llvm::Value *Chunk = nullptr;
5647 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
5648 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
5649 ScheduleKind = C->getDistScheduleKind();
5650 if (const Expr *Ch = C->getChunkSize()) {
5651 Chunk = EmitScalarExpr(Ch);
5652 Chunk = EmitScalarConversion(Chunk, Ch->getType(),
5653 S.getIterationVariable()->getType(),
5654 S.getBeginLoc());
5656 } else {
5657 // Default behaviour for dist_schedule clause.
5658 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5659 *this, S, ScheduleKind, Chunk);
5661 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
5662 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
5664 // OpenMP [2.10.8, distribute Construct, Description]
5665 // If dist_schedule is specified, kind must be static. If specified,
5666 // iterations are divided into chunks of size chunk_size, chunks are
5667 // assigned to the teams of the league in a round-robin fashion in the
5668 // order of the team number. When no chunk_size is specified, the
5669 // iteration space is divided into chunks that are approximately equal
5670 // in size, and at most one chunk is distributed to each team of the
5671 // league. The size of the chunks is unspecified in this case.
5672 bool StaticChunked =
5673 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
5674 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
5675 if (RT.isStaticNonchunked(ScheduleKind,
5676 /* Chunked */ Chunk != nullptr) ||
5677 StaticChunked) {
5678 CGOpenMPRuntime::StaticRTInput StaticInit(
5679 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this),
5680 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5681 StaticChunked ? Chunk : nullptr);
5682 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
5683 StaticInit);
5684 JumpDest LoopExit =
5685 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
5686 // UB = min(UB, GlobalUB);
5687 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5688 ? S.getCombinedEnsureUpperBound()
5689 : S.getEnsureUpperBound());
5690 // IV = LB;
5691 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5692 ? S.getCombinedInit()
5693 : S.getInit());
5695 const Expr *Cond =
5696 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5697 ? S.getCombinedCond()
5698 : S.getCond();
5700 if (StaticChunked)
5701 Cond = S.getCombinedDistCond();
5703 // For static unchunked schedules generate:
5705 // 1. For distribute alone, codegen
5706 // while (idx <= UB) {
5707 // BODY;
5708 // ++idx;
5709 // }
5711 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
5712 // while (idx <= UB) {
5713 // <CodeGen rest of pragma>(LB, UB);
5714 // idx += ST;
5715 // }
5717 // For static chunk one schedule generate:
5719 // while (IV <= GlobalUB) {
5720 // <CodeGen rest of pragma>(LB, UB);
5721 // LB += ST;
5722 // UB += ST;
5723 // UB = min(UB, GlobalUB);
5724 // IV = LB;
5725 // }
5727 emitCommonSimdLoop(
5728 *this, S,
5729 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5730 if (isOpenMPSimdDirective(S.getDirectiveKind()))
5731 CGF.EmitOMPSimdInit(S);
5733 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
5734 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
5735 CGF.EmitOMPInnerLoop(
5736 S, LoopScope.requiresCleanups(), Cond, IncExpr,
5737 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
5738 CodeGenLoop(CGF, S, LoopExit);
5740 [&S, StaticChunked](CodeGenFunction &CGF) {
5741 if (StaticChunked) {
5742 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
5743 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
5744 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
5745 CGF.EmitIgnoredExpr(S.getCombinedInit());
5749 EmitBlock(LoopExit.getBlock());
5750 // Tell the runtime we are done.
5751 RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
5752 } else {
5753 // Emit the outer loop, which requests its work chunk [LB..UB] from
5754 // runtime and runs the inner loop to process it.
5755 const OMPLoopArguments LoopArguments = {
5756 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5757 IL.getAddress(*this), Chunk};
5758 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
5759 CodeGenLoop);
5761 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
5762 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
5763 return CGF.Builder.CreateIsNotNull(
5764 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5767 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5768 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5769 !isOpenMPTeamsDirective(S.getDirectiveKind())) {
5770 EmitOMPReductionClauseFinal(S, OMPD_simd);
5771 // Emit post-update of the reduction variables if IsLastIter != 0.
5772 emitPostUpdateForReductionClause(
5773 *this, S, [IL, &S](CodeGenFunction &CGF) {
5774 return CGF.Builder.CreateIsNotNull(
5775 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5778 // Emit final copy of the lastprivate variables if IsLastIter != 0.
5779 if (HasLastprivateClause) {
5780 EmitOMPLastprivateClauseFinal(
5781 S, /*NoFinals=*/false,
5782 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
5786 // We're now done with the loop, so jump to the continuation block.
5787 if (ContBlock) {
5788 EmitBranch(ContBlock);
5789 EmitBlock(ContBlock, true);
5794 void CodeGenFunction::EmitOMPDistributeDirective(
5795 const OMPDistributeDirective &S) {
5796 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5797 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5799 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5800 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
5803 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
5804 const CapturedStmt *S,
5805 SourceLocation Loc) {
5806 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
5807 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5808 CGF.CapturedStmtInfo = &CapStmtInfo;
5809 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
5810 Fn->setDoesNotRecurse();
5811 return Fn;
5814 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
5815 if (CGM.getLangOpts().OpenMPIRBuilder) {
5816 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5817 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5819 if (S.hasClausesOfKind<OMPDependClause>()) {
5820 // The ordered directive with depend clause.
5821 assert(!S.hasAssociatedStmt() &&
5822 "No associated statement must be in ordered depend construct.");
5823 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5824 AllocaInsertPt->getIterator());
5825 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) {
5826 unsigned NumLoops = DC->getNumLoops();
5827 QualType Int64Ty = CGM.getContext().getIntTypeForBitwidth(
5828 /*DestWidth=*/64, /*Signed=*/1);
5829 llvm::SmallVector<llvm::Value *> StoreValues;
5830 for (unsigned I = 0; I < NumLoops; I++) {
5831 const Expr *CounterVal = DC->getLoopData(I);
5832 assert(CounterVal);
5833 llvm::Value *StoreValue = EmitScalarConversion(
5834 EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
5835 CounterVal->getExprLoc());
5836 StoreValues.emplace_back(StoreValue);
5838 bool IsDependSource = false;
5839 if (DC->getDependencyKind() == OMPC_DEPEND_source)
5840 IsDependSource = true;
5841 Builder.restoreIP(OMPBuilder.createOrderedDepend(
5842 Builder, AllocaIP, NumLoops, StoreValues, ".cnt.addr",
5843 IsDependSource));
5845 } else {
5846 // The ordered directive with threads or simd clause, or without clause.
5847 // Without clause, it behaves as if the threads clause is specified.
5848 const auto *C = S.getSingleClause<OMPSIMDClause>();
5850 auto FiniCB = [this](InsertPointTy IP) {
5851 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
5854 auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
5855 InsertPointTy CodeGenIP) {
5856 Builder.restoreIP(CodeGenIP);
5858 const CapturedStmt *CS = S.getInnermostCapturedStmt();
5859 if (C) {
5860 llvm::BasicBlock *FiniBB = splitBBWithSuffix(
5861 Builder, /*CreateBranch=*/false, ".ordered.after");
5862 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5863 GenerateOpenMPCapturedVars(*CS, CapturedVars);
5864 llvm::Function *OutlinedFn =
5865 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5866 assert(S.getBeginLoc().isValid() &&
5867 "Outlined function call location must be valid.");
5868 ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc());
5869 OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, *FiniBB,
5870 OutlinedFn, CapturedVars);
5871 } else {
5872 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
5873 *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered");
5877 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5878 Builder.restoreIP(
5879 OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C));
5881 return;
5884 if (S.hasClausesOfKind<OMPDependClause>()) {
5885 assert(!S.hasAssociatedStmt() &&
5886 "No associated statement must be in ordered depend construct.");
5887 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5888 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5889 return;
5891 const auto *C = S.getSingleClause<OMPSIMDClause>();
5892 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
5893 PrePostActionTy &Action) {
5894 const CapturedStmt *CS = S.getInnermostCapturedStmt();
5895 if (C) {
5896 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5897 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
5898 llvm::Function *OutlinedFn =
5899 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5900 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
5901 OutlinedFn, CapturedVars);
5902 } else {
5903 Action.Enter(CGF);
5904 CGF.EmitStmt(CS->getCapturedStmt());
5907 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5908 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
5911 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
5912 QualType SrcType, QualType DestType,
5913 SourceLocation Loc) {
5914 assert(CGF.hasScalarEvaluationKind(DestType) &&
5915 "DestType must have scalar evaluation kind.");
5916 assert(!Val.isAggregate() && "Must be a scalar or complex.");
5917 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
5918 DestType, Loc)
5919 : CGF.EmitComplexToScalarConversion(
5920 Val.getComplexVal(), SrcType, DestType, Loc);
5923 static CodeGenFunction::ComplexPairTy
5924 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
5925 QualType DestType, SourceLocation Loc) {
5926 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
5927 "DestType must have complex evaluation kind.");
5928 CodeGenFunction::ComplexPairTy ComplexVal;
5929 if (Val.isScalar()) {
5930 // Convert the input element to the element type of the complex.
5931 QualType DestElementType =
5932 DestType->castAs<ComplexType>()->getElementType();
5933 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
5934 Val.getScalarVal(), SrcType, DestElementType, Loc);
5935 ComplexVal = CodeGenFunction::ComplexPairTy(
5936 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
5937 } else {
5938 assert(Val.isComplex() && "Must be a scalar or complex.");
5939 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
5940 QualType DestElementType =
5941 DestType->castAs<ComplexType>()->getElementType();
5942 ComplexVal.first = CGF.EmitScalarConversion(
5943 Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
5944 ComplexVal.second = CGF.EmitScalarConversion(
5945 Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
5947 return ComplexVal;
5950 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
5951 LValue LVal, RValue RVal) {
5952 if (LVal.isGlobalReg())
5953 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
5954 else
5955 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
5958 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
5959 llvm::AtomicOrdering AO, LValue LVal,
5960 SourceLocation Loc) {
5961 if (LVal.isGlobalReg())
5962 return CGF.EmitLoadOfLValue(LVal, Loc);
5963 return CGF.EmitAtomicLoad(
5964 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
5965 LVal.isVolatile());
5968 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
5969 QualType RValTy, SourceLocation Loc) {
5970 switch (getEvaluationKind(LVal.getType())) {
5971 case TEK_Scalar:
5972 EmitStoreThroughLValue(RValue::get(convertToScalarValue(
5973 *this, RVal, RValTy, LVal.getType(), Loc)),
5974 LVal);
5975 break;
5976 case TEK_Complex:
5977 EmitStoreOfComplex(
5978 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
5979 /*isInit=*/false);
5980 break;
5981 case TEK_Aggregate:
5982 llvm_unreachable("Must be a scalar or complex.");
5986 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
5987 const Expr *X, const Expr *V,
5988 SourceLocation Loc) {
5989 // v = x;
5990 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
5991 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
5992 LValue XLValue = CGF.EmitLValue(X);
5993 LValue VLValue = CGF.EmitLValue(V);
5994 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
5995 // OpenMP, 2.17.7, atomic Construct
5996 // If the read or capture clause is specified and the acquire, acq_rel, or
5997 // seq_cst clause is specified then the strong flush on exit from the atomic
5998 // operation is also an acquire flush.
5999 switch (AO) {
6000 case llvm::AtomicOrdering::Acquire:
6001 case llvm::AtomicOrdering::AcquireRelease:
6002 case llvm::AtomicOrdering::SequentiallyConsistent:
6003 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6004 llvm::AtomicOrdering::Acquire);
6005 break;
6006 case llvm::AtomicOrdering::Monotonic:
6007 case llvm::AtomicOrdering::Release:
6008 break;
6009 case llvm::AtomicOrdering::NotAtomic:
6010 case llvm::AtomicOrdering::Unordered:
6011 llvm_unreachable("Unexpected ordering.");
6013 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
6014 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
6017 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
6018 llvm::AtomicOrdering AO, const Expr *X,
6019 const Expr *E, SourceLocation Loc) {
6020 // x = expr;
6021 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
6022 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
6023 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6024 // OpenMP, 2.17.7, atomic Construct
6025 // If the write, update, or capture clause is specified and the release,
6026 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6027 // the atomic operation is also a release flush.
6028 switch (AO) {
6029 case llvm::AtomicOrdering::Release:
6030 case llvm::AtomicOrdering::AcquireRelease:
6031 case llvm::AtomicOrdering::SequentiallyConsistent:
6032 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6033 llvm::AtomicOrdering::Release);
6034 break;
6035 case llvm::AtomicOrdering::Acquire:
6036 case llvm::AtomicOrdering::Monotonic:
6037 break;
6038 case llvm::AtomicOrdering::NotAtomic:
6039 case llvm::AtomicOrdering::Unordered:
6040 llvm_unreachable("Unexpected ordering.");
6044 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
6045 RValue Update,
6046 BinaryOperatorKind BO,
6047 llvm::AtomicOrdering AO,
6048 bool IsXLHSInRHSPart) {
6049 ASTContext &Context = CGF.getContext();
6050 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6051 // expression is simple and atomic is allowed for the given type for the
6052 // target platform.
6053 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
6054 (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
6055 (Update.getScalarVal()->getType() !=
6056 X.getAddress(CGF).getElementType())) ||
6057 !Context.getTargetInfo().hasBuiltinAtomic(
6058 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
6059 return std::make_pair(false, RValue::get(nullptr));
6061 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
6062 if (T->isIntegerTy())
6063 return true;
6065 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
6066 return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T));
6068 return false;
6071 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
6072 !CheckAtomicSupport(X.getAddress(CGF).getElementType(), BO))
6073 return std::make_pair(false, RValue::get(nullptr));
6075 bool IsInteger = X.getAddress(CGF).getElementType()->isIntegerTy();
6076 llvm::AtomicRMWInst::BinOp RMWOp;
6077 switch (BO) {
6078 case BO_Add:
6079 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
6080 break;
6081 case BO_Sub:
6082 if (!IsXLHSInRHSPart)
6083 return std::make_pair(false, RValue::get(nullptr));
6084 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
6085 break;
6086 case BO_And:
6087 RMWOp = llvm::AtomicRMWInst::And;
6088 break;
6089 case BO_Or:
6090 RMWOp = llvm::AtomicRMWInst::Or;
6091 break;
6092 case BO_Xor:
6093 RMWOp = llvm::AtomicRMWInst::Xor;
6094 break;
6095 case BO_LT:
6096 if (IsInteger)
6097 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6098 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
6099 : llvm::AtomicRMWInst::Max)
6100 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
6101 : llvm::AtomicRMWInst::UMax);
6102 else
6103 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin
6104 : llvm::AtomicRMWInst::FMax;
6105 break;
6106 case BO_GT:
6107 if (IsInteger)
6108 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6109 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
6110 : llvm::AtomicRMWInst::Min)
6111 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
6112 : llvm::AtomicRMWInst::UMin);
6113 else
6114 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax
6115 : llvm::AtomicRMWInst::FMin;
6116 break;
6117 case BO_Assign:
6118 RMWOp = llvm::AtomicRMWInst::Xchg;
6119 break;
6120 case BO_Mul:
6121 case BO_Div:
6122 case BO_Rem:
6123 case BO_Shl:
6124 case BO_Shr:
6125 case BO_LAnd:
6126 case BO_LOr:
6127 return std::make_pair(false, RValue::get(nullptr));
6128 case BO_PtrMemD:
6129 case BO_PtrMemI:
6130 case BO_LE:
6131 case BO_GE:
6132 case BO_EQ:
6133 case BO_NE:
6134 case BO_Cmp:
6135 case BO_AddAssign:
6136 case BO_SubAssign:
6137 case BO_AndAssign:
6138 case BO_OrAssign:
6139 case BO_XorAssign:
6140 case BO_MulAssign:
6141 case BO_DivAssign:
6142 case BO_RemAssign:
6143 case BO_ShlAssign:
6144 case BO_ShrAssign:
6145 case BO_Comma:
6146 llvm_unreachable("Unsupported atomic update operation");
6148 llvm::Value *UpdateVal = Update.getScalarVal();
6149 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
6150 if (IsInteger)
6151 UpdateVal = CGF.Builder.CreateIntCast(
6152 IC, X.getAddress(CGF).getElementType(),
6153 X.getType()->hasSignedIntegerRepresentation());
6154 else
6155 UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC,
6156 X.getAddress(CGF).getElementType());
6158 llvm::Value *Res =
6159 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
6160 return std::make_pair(true, RValue::get(Res));
6163 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6164 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
6165 llvm::AtomicOrdering AO, SourceLocation Loc,
6166 const llvm::function_ref<RValue(RValue)> CommonGen) {
6167 // Update expressions are allowed to have the following forms:
6168 // x binop= expr; -> xrval + expr;
6169 // x++, ++x -> xrval + 1;
6170 // x--, --x -> xrval - 1;
6171 // x = x binop expr; -> xrval binop expr
6172 // x = expr Op x; - > expr binop xrval;
6173 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
6174 if (!Res.first) {
6175 if (X.isGlobalReg()) {
6176 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6177 // 'xrval'.
6178 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
6179 } else {
6180 // Perform compare-and-swap procedure.
6181 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
6184 return Res;
6187 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
6188 llvm::AtomicOrdering AO, const Expr *X,
6189 const Expr *E, const Expr *UE,
6190 bool IsXLHSInRHSPart, SourceLocation Loc) {
6191 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6192 "Update expr in 'atomic update' must be a binary operator.");
6193 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
6194 // Update expressions are allowed to have the following forms:
6195 // x binop= expr; -> xrval + expr;
6196 // x++, ++x -> xrval + 1;
6197 // x--, --x -> xrval - 1;
6198 // x = x binop expr; -> xrval binop expr
6199 // x = expr Op x; - > expr binop xrval;
6200 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
6201 LValue XLValue = CGF.EmitLValue(X);
6202 RValue ExprRValue = CGF.EmitAnyExpr(E);
6203 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
6204 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
6205 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6206 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6207 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
6208 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6209 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6210 return CGF.EmitAnyExpr(UE);
6212 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
6213 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6214 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6215 // OpenMP, 2.17.7, atomic Construct
6216 // If the write, update, or capture clause is specified and the release,
6217 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6218 // the atomic operation is also a release flush.
6219 switch (AO) {
6220 case llvm::AtomicOrdering::Release:
6221 case llvm::AtomicOrdering::AcquireRelease:
6222 case llvm::AtomicOrdering::SequentiallyConsistent:
6223 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6224 llvm::AtomicOrdering::Release);
6225 break;
6226 case llvm::AtomicOrdering::Acquire:
6227 case llvm::AtomicOrdering::Monotonic:
6228 break;
6229 case llvm::AtomicOrdering::NotAtomic:
6230 case llvm::AtomicOrdering::Unordered:
6231 llvm_unreachable("Unexpected ordering.");
6235 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
6236 QualType SourceType, QualType ResType,
6237 SourceLocation Loc) {
6238 switch (CGF.getEvaluationKind(ResType)) {
6239 case TEK_Scalar:
6240 return RValue::get(
6241 convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
6242 case TEK_Complex: {
6243 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
6244 return RValue::getComplex(Res.first, Res.second);
6246 case TEK_Aggregate:
6247 break;
6249 llvm_unreachable("Must be a scalar or complex.");
6252 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
6253 llvm::AtomicOrdering AO,
6254 bool IsPostfixUpdate, const Expr *V,
6255 const Expr *X, const Expr *E,
6256 const Expr *UE, bool IsXLHSInRHSPart,
6257 SourceLocation Loc) {
6258 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
6259 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
6260 RValue NewVVal;
6261 LValue VLValue = CGF.EmitLValue(V);
6262 LValue XLValue = CGF.EmitLValue(X);
6263 RValue ExprRValue = CGF.EmitAnyExpr(E);
6264 QualType NewVValType;
6265 if (UE) {
6266 // 'x' is updated with some additional value.
6267 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6268 "Update expr in 'atomic capture' must be a binary operator.");
6269 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
6270 // Update expressions are allowed to have the following forms:
6271 // x binop= expr; -> xrval + expr;
6272 // x++, ++x -> xrval + 1;
6273 // x--, --x -> xrval - 1;
6274 // x = x binop expr; -> xrval binop expr
6275 // x = expr Op x; - > expr binop xrval;
6276 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
6277 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
6278 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6279 NewVValType = XRValExpr->getType();
6280 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6281 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
6282 IsPostfixUpdate](RValue XRValue) {
6283 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6284 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6285 RValue Res = CGF.EmitAnyExpr(UE);
6286 NewVVal = IsPostfixUpdate ? XRValue : Res;
6287 return Res;
6289 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6290 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6291 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6292 if (Res.first) {
6293 // 'atomicrmw' instruction was generated.
6294 if (IsPostfixUpdate) {
6295 // Use old value from 'atomicrmw'.
6296 NewVVal = Res.second;
6297 } else {
6298 // 'atomicrmw' does not provide new value, so evaluate it using old
6299 // value of 'x'.
6300 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6301 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6302 NewVVal = CGF.EmitAnyExpr(UE);
6305 } else {
6306 // 'x' is simply rewritten with some 'expr'.
6307 NewVValType = X->getType().getNonReferenceType();
6308 ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
6309 X->getType().getNonReferenceType(), Loc);
6310 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6311 NewVVal = XRValue;
6312 return ExprRValue;
6314 // Try to perform atomicrmw xchg, otherwise simple exchange.
6315 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6316 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
6317 Loc, Gen);
6318 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6319 if (Res.first) {
6320 // 'atomicrmw' instruction was generated.
6321 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
6324 // Emit post-update store to 'v' of old/new 'x' value.
6325 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
6326 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
6327 // OpenMP 5.1 removes the required flush for capture clause.
6328 if (CGF.CGM.getLangOpts().OpenMP < 51) {
6329 // OpenMP, 2.17.7, atomic Construct
6330 // If the write, update, or capture clause is specified and the release,
6331 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6332 // the atomic operation is also a release flush.
6333 // If the read or capture clause is specified and the acquire, acq_rel, or
6334 // seq_cst clause is specified then the strong flush on exit from the atomic
6335 // operation is also an acquire flush.
6336 switch (AO) {
6337 case llvm::AtomicOrdering::Release:
6338 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6339 llvm::AtomicOrdering::Release);
6340 break;
6341 case llvm::AtomicOrdering::Acquire:
6342 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6343 llvm::AtomicOrdering::Acquire);
6344 break;
6345 case llvm::AtomicOrdering::AcquireRelease:
6346 case llvm::AtomicOrdering::SequentiallyConsistent:
6347 CGF.CGM.getOpenMPRuntime().emitFlush(
6348 CGF, std::nullopt, Loc, llvm::AtomicOrdering::AcquireRelease);
6349 break;
6350 case llvm::AtomicOrdering::Monotonic:
6351 break;
6352 case llvm::AtomicOrdering::NotAtomic:
6353 case llvm::AtomicOrdering::Unordered:
6354 llvm_unreachable("Unexpected ordering.");
6359 static void emitOMPAtomicCompareExpr(CodeGenFunction &CGF,
6360 llvm::AtomicOrdering AO, const Expr *X,
6361 const Expr *V, const Expr *R,
6362 const Expr *E, const Expr *D,
6363 const Expr *CE, bool IsXBinopExpr,
6364 bool IsPostfixUpdate, bool IsFailOnly,
6365 SourceLocation Loc) {
6366 llvm::OpenMPIRBuilder &OMPBuilder =
6367 CGF.CGM.getOpenMPRuntime().getOMPBuilder();
6369 OMPAtomicCompareOp Op;
6370 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
6371 switch (cast<BinaryOperator>(CE)->getOpcode()) {
6372 case BO_EQ:
6373 Op = OMPAtomicCompareOp::EQ;
6374 break;
6375 case BO_LT:
6376 Op = OMPAtomicCompareOp::MIN;
6377 break;
6378 case BO_GT:
6379 Op = OMPAtomicCompareOp::MAX;
6380 break;
6381 default:
6382 llvm_unreachable("unsupported atomic compare binary operator");
6385 LValue XLVal = CGF.EmitLValue(X);
6386 Address XAddr = XLVal.getAddress(CGF);
6388 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) {
6389 if (X->getType() == E->getType())
6390 return CGF.EmitScalarExpr(E);
6391 const Expr *NewE = E->IgnoreImplicitAsWritten();
6392 llvm::Value *V = CGF.EmitScalarExpr(NewE);
6393 if (NewE->getType() == X->getType())
6394 return V;
6395 return CGF.EmitScalarConversion(V, NewE->getType(), X->getType(), Loc);
6398 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
6399 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr;
6400 if (auto *CI = dyn_cast<llvm::ConstantInt>(EVal))
6401 EVal = CGF.Builder.CreateIntCast(
6402 CI, XLVal.getAddress(CGF).getElementType(),
6403 E->getType()->hasSignedIntegerRepresentation());
6404 if (DVal)
6405 if (auto *CI = dyn_cast<llvm::ConstantInt>(DVal))
6406 DVal = CGF.Builder.CreateIntCast(
6407 CI, XLVal.getAddress(CGF).getElementType(),
6408 D->getType()->hasSignedIntegerRepresentation());
6410 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
6411 XAddr.getPointer(), XAddr.getElementType(),
6412 X->getType()->hasSignedIntegerRepresentation(),
6413 X->getType().isVolatileQualified()};
6414 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal;
6415 if (V) {
6416 LValue LV = CGF.EmitLValue(V);
6417 Address Addr = LV.getAddress(CGF);
6418 VOpVal = {Addr.getPointer(), Addr.getElementType(),
6419 V->getType()->hasSignedIntegerRepresentation(),
6420 V->getType().isVolatileQualified()};
6422 if (R) {
6423 LValue LV = CGF.EmitLValue(R);
6424 Address Addr = LV.getAddress(CGF);
6425 ROpVal = {Addr.getPointer(), Addr.getElementType(),
6426 R->getType()->hasSignedIntegerRepresentation(),
6427 R->getType().isVolatileQualified()};
6430 CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
6431 CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
6432 IsPostfixUpdate, IsFailOnly));
6435 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
6436 llvm::AtomicOrdering AO, bool IsPostfixUpdate,
6437 const Expr *X, const Expr *V, const Expr *R,
6438 const Expr *E, const Expr *UE, const Expr *D,
6439 const Expr *CE, bool IsXLHSInRHSPart,
6440 bool IsFailOnly, SourceLocation Loc) {
6441 switch (Kind) {
6442 case OMPC_read:
6443 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
6444 break;
6445 case OMPC_write:
6446 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
6447 break;
6448 case OMPC_unknown:
6449 case OMPC_update:
6450 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
6451 break;
6452 case OMPC_capture:
6453 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
6454 IsXLHSInRHSPart, Loc);
6455 break;
6456 case OMPC_compare: {
6457 emitOMPAtomicCompareExpr(CGF, AO, X, V, R, E, D, CE, IsXLHSInRHSPart,
6458 IsPostfixUpdate, IsFailOnly, Loc);
6459 break;
6461 default:
6462 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6466 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
6467 llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
6468 bool MemOrderingSpecified = false;
6469 if (S.getSingleClause<OMPSeqCstClause>()) {
6470 AO = llvm::AtomicOrdering::SequentiallyConsistent;
6471 MemOrderingSpecified = true;
6472 } else if (S.getSingleClause<OMPAcqRelClause>()) {
6473 AO = llvm::AtomicOrdering::AcquireRelease;
6474 MemOrderingSpecified = true;
6475 } else if (S.getSingleClause<OMPAcquireClause>()) {
6476 AO = llvm::AtomicOrdering::Acquire;
6477 MemOrderingSpecified = true;
6478 } else if (S.getSingleClause<OMPReleaseClause>()) {
6479 AO = llvm::AtomicOrdering::Release;
6480 MemOrderingSpecified = true;
6481 } else if (S.getSingleClause<OMPRelaxedClause>()) {
6482 AO = llvm::AtomicOrdering::Monotonic;
6483 MemOrderingSpecified = true;
6485 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
6486 OpenMPClauseKind Kind = OMPC_unknown;
6487 for (const OMPClause *C : S.clauses()) {
6488 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6489 // if it is first).
6490 OpenMPClauseKind K = C->getClauseKind();
6491 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
6492 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
6493 continue;
6494 Kind = K;
6495 KindsEncountered.insert(K);
6497 // We just need to correct Kind here. No need to set a bool saying it is
6498 // actually compare capture because we can tell from whether V and R are
6499 // nullptr.
6500 if (KindsEncountered.contains(OMPC_compare) &&
6501 KindsEncountered.contains(OMPC_capture))
6502 Kind = OMPC_compare;
6503 if (!MemOrderingSpecified) {
6504 llvm::AtomicOrdering DefaultOrder =
6505 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6506 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
6507 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
6508 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
6509 Kind == OMPC_capture)) {
6510 AO = DefaultOrder;
6511 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
6512 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
6513 AO = llvm::AtomicOrdering::Release;
6514 } else if (Kind == OMPC_read) {
6515 assert(Kind == OMPC_read && "Unexpected atomic kind.");
6516 AO = llvm::AtomicOrdering::Acquire;
6521 LexicalScope Scope(*this, S.getSourceRange());
6522 EmitStopPoint(S.getAssociatedStmt());
6523 emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
6524 S.getR(), S.getExpr(), S.getUpdateExpr(), S.getD(),
6525 S.getCondExpr(), S.isXLHSInRHSPart(), S.isFailOnly(),
6526 S.getBeginLoc());
6529 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
6530 const OMPExecutableDirective &S,
6531 const RegionCodeGenTy &CodeGen) {
6532 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
6533 CodeGenModule &CGM = CGF.CGM;
6535 // On device emit this construct as inlined code.
6536 if (CGM.getLangOpts().OpenMPIsDevice) {
6537 OMPLexicalScope Scope(CGF, S, OMPD_target);
6538 CGM.getOpenMPRuntime().emitInlinedDirective(
6539 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6540 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
6542 return;
6545 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
6546 llvm::Function *Fn = nullptr;
6547 llvm::Constant *FnID = nullptr;
6549 const Expr *IfCond = nullptr;
6550 // Check for the at most one if clause associated with the target region.
6551 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6552 if (C->getNameModifier() == OMPD_unknown ||
6553 C->getNameModifier() == OMPD_target) {
6554 IfCond = C->getCondition();
6555 break;
6559 // Check if we have any device clause associated with the directive.
6560 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
6561 nullptr, OMPC_DEVICE_unknown);
6562 if (auto *C = S.getSingleClause<OMPDeviceClause>())
6563 Device.setPointerAndInt(C->getDevice(), C->getModifier());
6565 // Check if we have an if clause whose conditional always evaluates to false
6566 // or if we do not have any targets specified. If so the target region is not
6567 // an offload entry point.
6568 bool IsOffloadEntry = true;
6569 if (IfCond) {
6570 bool Val;
6571 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
6572 IsOffloadEntry = false;
6574 if (CGM.getLangOpts().OMPTargetTriples.empty())
6575 IsOffloadEntry = false;
6577 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
6578 unsigned DiagID = CGM.getDiags().getCustomDiagID(
6579 DiagnosticsEngine::Error,
6580 "No offloading entry generated while offloading is mandatory.");
6581 CGM.getDiags().Report(DiagID);
6584 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
6585 StringRef ParentName;
6586 // In case we have Ctors/Dtors we use the complete type variant to produce
6587 // the mangling of the device outlined kernel.
6588 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
6589 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
6590 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
6591 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
6592 else
6593 ParentName =
6594 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
6596 // Emit target region as a standalone region.
6597 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
6598 IsOffloadEntry, CodeGen);
6599 OMPLexicalScope Scope(CGF, S, OMPD_task);
6600 auto &&SizeEmitter =
6601 [IsOffloadEntry](CodeGenFunction &CGF,
6602 const OMPLoopDirective &D) -> llvm::Value * {
6603 if (IsOffloadEntry) {
6604 OMPLoopScope(CGF, D);
6605 // Emit calculation of the iterations count.
6606 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
6607 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
6608 /*isSigned=*/false);
6609 return NumIterations;
6611 return nullptr;
6613 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
6614 SizeEmitter);
6617 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
6618 PrePostActionTy &Action) {
6619 Action.Enter(CGF);
6620 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6621 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6622 CGF.EmitOMPPrivateClause(S, PrivateScope);
6623 (void)PrivateScope.Privatize();
6624 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6625 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6627 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
6628 CGF.EnsureInsertPoint();
6631 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
6632 StringRef ParentName,
6633 const OMPTargetDirective &S) {
6634 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6635 emitTargetRegion(CGF, S, Action);
6637 llvm::Function *Fn;
6638 llvm::Constant *Addr;
6639 // Emit target region as a standalone region.
6640 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6641 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6642 assert(Fn && Addr && "Target device function emission failed.");
6645 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
6646 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6647 emitTargetRegion(CGF, S, Action);
6649 emitCommonOMPTargetDirective(*this, S, CodeGen);
6652 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
6653 const OMPExecutableDirective &S,
6654 OpenMPDirectiveKind InnermostKind,
6655 const RegionCodeGenTy &CodeGen) {
6656 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
6657 llvm::Function *OutlinedFn =
6658 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
6659 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
6661 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
6662 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
6663 if (NT || TL) {
6664 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
6665 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
6667 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
6668 S.getBeginLoc());
6671 OMPTeamsScope Scope(CGF, S);
6672 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6673 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
6674 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
6675 CapturedVars);
6678 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
6679 // Emit teams region as a standalone region.
6680 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6681 Action.Enter(CGF);
6682 OMPPrivateScope PrivateScope(CGF);
6683 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6684 CGF.EmitOMPPrivateClause(S, PrivateScope);
6685 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6686 (void)PrivateScope.Privatize();
6687 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
6688 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6690 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6691 emitPostUpdateForReductionClause(*this, S,
6692 [](CodeGenFunction &) { return nullptr; });
6695 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6696 const OMPTargetTeamsDirective &S) {
6697 auto *CS = S.getCapturedStmt(OMPD_teams);
6698 Action.Enter(CGF);
6699 // Emit teams region as a standalone region.
6700 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6701 Action.Enter(CGF);
6702 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6703 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6704 CGF.EmitOMPPrivateClause(S, PrivateScope);
6705 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6706 (void)PrivateScope.Privatize();
6707 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6708 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6709 CGF.EmitStmt(CS->getCapturedStmt());
6710 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6712 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
6713 emitPostUpdateForReductionClause(CGF, S,
6714 [](CodeGenFunction &) { return nullptr; });
6717 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6718 CodeGenModule &CGM, StringRef ParentName,
6719 const OMPTargetTeamsDirective &S) {
6720 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6721 emitTargetTeamsRegion(CGF, Action, S);
6723 llvm::Function *Fn;
6724 llvm::Constant *Addr;
6725 // Emit target region as a standalone region.
6726 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6727 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6728 assert(Fn && Addr && "Target device function emission failed.");
6731 void CodeGenFunction::EmitOMPTargetTeamsDirective(
6732 const OMPTargetTeamsDirective &S) {
6733 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6734 emitTargetTeamsRegion(CGF, Action, S);
6736 emitCommonOMPTargetDirective(*this, S, CodeGen);
6739 static void
6740 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6741 const OMPTargetTeamsDistributeDirective &S) {
6742 Action.Enter(CGF);
6743 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6744 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6747 // Emit teams region as a standalone region.
6748 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6749 PrePostActionTy &Action) {
6750 Action.Enter(CGF);
6751 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6752 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6753 (void)PrivateScope.Privatize();
6754 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6755 CodeGenDistribute);
6756 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6758 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
6759 emitPostUpdateForReductionClause(CGF, S,
6760 [](CodeGenFunction &) { return nullptr; });
6763 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6764 CodeGenModule &CGM, StringRef ParentName,
6765 const OMPTargetTeamsDistributeDirective &S) {
6766 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6767 emitTargetTeamsDistributeRegion(CGF, Action, S);
6769 llvm::Function *Fn;
6770 llvm::Constant *Addr;
6771 // Emit target region as a standalone region.
6772 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6773 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6774 assert(Fn && Addr && "Target device function emission failed.");
6777 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6778 const OMPTargetTeamsDistributeDirective &S) {
6779 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6780 emitTargetTeamsDistributeRegion(CGF, Action, S);
6782 emitCommonOMPTargetDirective(*this, S, CodeGen);
6785 static void emitTargetTeamsDistributeSimdRegion(
6786 CodeGenFunction &CGF, PrePostActionTy &Action,
6787 const OMPTargetTeamsDistributeSimdDirective &S) {
6788 Action.Enter(CGF);
6789 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6790 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6793 // Emit teams region as a standalone region.
6794 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6795 PrePostActionTy &Action) {
6796 Action.Enter(CGF);
6797 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6798 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6799 (void)PrivateScope.Privatize();
6800 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6801 CodeGenDistribute);
6802 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6804 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
6805 emitPostUpdateForReductionClause(CGF, S,
6806 [](CodeGenFunction &) { return nullptr; });
6809 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6810 CodeGenModule &CGM, StringRef ParentName,
6811 const OMPTargetTeamsDistributeSimdDirective &S) {
6812 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6813 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6815 llvm::Function *Fn;
6816 llvm::Constant *Addr;
6817 // Emit target region as a standalone region.
6818 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6819 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6820 assert(Fn && Addr && "Target device function emission failed.");
6823 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6824 const OMPTargetTeamsDistributeSimdDirective &S) {
6825 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6826 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6828 emitCommonOMPTargetDirective(*this, S, CodeGen);
6831 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6832 const OMPTeamsDistributeDirective &S) {
6834 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6835 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6838 // Emit teams region as a standalone region.
6839 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6840 PrePostActionTy &Action) {
6841 Action.Enter(CGF);
6842 OMPPrivateScope PrivateScope(CGF);
6843 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6844 (void)PrivateScope.Privatize();
6845 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6846 CodeGenDistribute);
6847 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6849 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6850 emitPostUpdateForReductionClause(*this, S,
6851 [](CodeGenFunction &) { return nullptr; });
6854 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6855 const OMPTeamsDistributeSimdDirective &S) {
6856 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6857 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6860 // Emit teams region as a standalone region.
6861 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6862 PrePostActionTy &Action) {
6863 Action.Enter(CGF);
6864 OMPPrivateScope PrivateScope(CGF);
6865 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6866 (void)PrivateScope.Privatize();
6867 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
6868 CodeGenDistribute);
6869 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6871 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
6872 emitPostUpdateForReductionClause(*this, S,
6873 [](CodeGenFunction &) { return nullptr; });
6876 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
6877 const OMPTeamsDistributeParallelForDirective &S) {
6878 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6879 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6880 S.getDistInc());
6883 // Emit teams region as a standalone region.
6884 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6885 PrePostActionTy &Action) {
6886 Action.Enter(CGF);
6887 OMPPrivateScope PrivateScope(CGF);
6888 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6889 (void)PrivateScope.Privatize();
6890 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6891 CodeGenDistribute);
6892 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6894 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
6895 emitPostUpdateForReductionClause(*this, S,
6896 [](CodeGenFunction &) { return nullptr; });
6899 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
6900 const OMPTeamsDistributeParallelForSimdDirective &S) {
6901 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6902 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6903 S.getDistInc());
6906 // Emit teams region as a standalone region.
6907 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6908 PrePostActionTy &Action) {
6909 Action.Enter(CGF);
6910 OMPPrivateScope PrivateScope(CGF);
6911 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6912 (void)PrivateScope.Privatize();
6913 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
6914 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
6915 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6917 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
6918 CodeGen);
6919 emitPostUpdateForReductionClause(*this, S,
6920 [](CodeGenFunction &) { return nullptr; });
6923 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
6924 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
6925 llvm::Value *Device = nullptr;
6926 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6927 Device = EmitScalarExpr(C->getDevice());
6929 llvm::Value *NumDependences = nullptr;
6930 llvm::Value *DependenceAddress = nullptr;
6931 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
6932 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
6933 DC->getModifier());
6934 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
6935 std::pair<llvm::Value *, Address> DependencePair =
6936 CGM.getOpenMPRuntime().emitDependClause(*this, Dependencies,
6937 DC->getBeginLoc());
6938 NumDependences = DependencePair.first;
6939 DependenceAddress = Builder.CreatePointerCast(
6940 DependencePair.second.getPointer(), CGM.Int8PtrTy);
6943 assert(!(S.hasClausesOfKind<OMPNowaitClause>() &&
6944 !(S.getSingleClause<OMPInitClause>() ||
6945 S.getSingleClause<OMPDestroyClause>() ||
6946 S.getSingleClause<OMPUseClause>())) &&
6947 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
6949 if (const auto *C = S.getSingleClause<OMPInitClause>()) {
6950 llvm::Value *InteropvarPtr =
6951 EmitLValue(C->getInteropVar()).getPointer(*this);
6952 llvm::omp::OMPInteropType InteropType = llvm::omp::OMPInteropType::Unknown;
6953 if (C->getIsTarget()) {
6954 InteropType = llvm::omp::OMPInteropType::Target;
6955 } else {
6956 assert(C->getIsTargetSync() && "Expected interop-type target/targetsync");
6957 InteropType = llvm::omp::OMPInteropType::TargetSync;
6959 OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, Device,
6960 NumDependences, DependenceAddress,
6961 S.hasClausesOfKind<OMPNowaitClause>());
6962 } else if (const auto *C = S.getSingleClause<OMPDestroyClause>()) {
6963 llvm::Value *InteropvarPtr =
6964 EmitLValue(C->getInteropVar()).getPointer(*this);
6965 OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device,
6966 NumDependences, DependenceAddress,
6967 S.hasClausesOfKind<OMPNowaitClause>());
6968 } else if (const auto *C = S.getSingleClause<OMPUseClause>()) {
6969 llvm::Value *InteropvarPtr =
6970 EmitLValue(C->getInteropVar()).getPointer(*this);
6971 OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device,
6972 NumDependences, DependenceAddress,
6973 S.hasClausesOfKind<OMPNowaitClause>());
6977 static void emitTargetTeamsDistributeParallelForRegion(
6978 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
6979 PrePostActionTy &Action) {
6980 Action.Enter(CGF);
6981 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6982 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6983 S.getDistInc());
6986 // Emit teams region as a standalone region.
6987 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6988 PrePostActionTy &Action) {
6989 Action.Enter(CGF);
6990 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6991 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6992 (void)PrivateScope.Privatize();
6993 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
6994 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
6995 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6998 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
6999 CodeGenTeams);
7000 emitPostUpdateForReductionClause(CGF, S,
7001 [](CodeGenFunction &) { return nullptr; });
7004 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7005 CodeGenModule &CGM, StringRef ParentName,
7006 const OMPTargetTeamsDistributeParallelForDirective &S) {
7007 // Emit SPMD target teams distribute parallel for region as a standalone
7008 // region.
7009 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7010 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7012 llvm::Function *Fn;
7013 llvm::Constant *Addr;
7014 // Emit target region as a standalone region.
7015 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7016 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7017 assert(Fn && Addr && "Target device function emission failed.");
7020 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7021 const OMPTargetTeamsDistributeParallelForDirective &S) {
7022 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7023 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7025 emitCommonOMPTargetDirective(*this, S, CodeGen);
7028 static void emitTargetTeamsDistributeParallelForSimdRegion(
7029 CodeGenFunction &CGF,
7030 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
7031 PrePostActionTy &Action) {
7032 Action.Enter(CGF);
7033 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7034 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7035 S.getDistInc());
7038 // Emit teams region as a standalone region.
7039 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7040 PrePostActionTy &Action) {
7041 Action.Enter(CGF);
7042 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7043 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7044 (void)PrivateScope.Privatize();
7045 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7046 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7047 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7050 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
7051 CodeGenTeams);
7052 emitPostUpdateForReductionClause(CGF, S,
7053 [](CodeGenFunction &) { return nullptr; });
7056 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7057 CodeGenModule &CGM, StringRef ParentName,
7058 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7059 // Emit SPMD target teams distribute parallel for simd region as a standalone
7060 // region.
7061 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7062 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7064 llvm::Function *Fn;
7065 llvm::Constant *Addr;
7066 // Emit target region as a standalone region.
7067 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7068 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7069 assert(Fn && Addr && "Target device function emission failed.");
7072 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7073 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7074 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7075 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7077 emitCommonOMPTargetDirective(*this, S, CodeGen);
7080 void CodeGenFunction::EmitOMPCancellationPointDirective(
7081 const OMPCancellationPointDirective &S) {
7082 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
7083 S.getCancelRegion());
7086 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
7087 const Expr *IfCond = nullptr;
7088 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7089 if (C->getNameModifier() == OMPD_unknown ||
7090 C->getNameModifier() == OMPD_cancel) {
7091 IfCond = C->getCondition();
7092 break;
7095 if (CGM.getLangOpts().OpenMPIRBuilder) {
7096 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7097 // TODO: This check is necessary as we only generate `omp parallel` through
7098 // the OpenMPIRBuilder for now.
7099 if (S.getCancelRegion() == OMPD_parallel ||
7100 S.getCancelRegion() == OMPD_sections ||
7101 S.getCancelRegion() == OMPD_section) {
7102 llvm::Value *IfCondition = nullptr;
7103 if (IfCond)
7104 IfCondition = EmitScalarExpr(IfCond,
7105 /*IgnoreResultAssign=*/true);
7106 return Builder.restoreIP(
7107 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
7111 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
7112 S.getCancelRegion());
7115 CodeGenFunction::JumpDest
7116 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
7117 if (Kind == OMPD_parallel || Kind == OMPD_task ||
7118 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
7119 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
7120 return ReturnBlock;
7121 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
7122 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
7123 Kind == OMPD_distribute_parallel_for ||
7124 Kind == OMPD_target_parallel_for ||
7125 Kind == OMPD_teams_distribute_parallel_for ||
7126 Kind == OMPD_target_teams_distribute_parallel_for);
7127 return OMPCancelStack.getExitBlock();
7130 void CodeGenFunction::EmitOMPUseDevicePtrClause(
7131 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
7132 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
7133 auto OrigVarIt = C.varlist_begin();
7134 auto InitIt = C.inits().begin();
7135 for (const Expr *PvtVarIt : C.private_copies()) {
7136 const auto *OrigVD =
7137 cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
7138 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
7139 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
7141 // In order to identify the right initializer we need to match the
7142 // declaration used by the mapping logic. In some cases we may get
7143 // OMPCapturedExprDecl that refers to the original declaration.
7144 const ValueDecl *MatchingVD = OrigVD;
7145 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7146 // OMPCapturedExprDecl are used to privative fields of the current
7147 // structure.
7148 const auto *ME = cast<MemberExpr>(OED->getInit());
7149 assert(isa<CXXThisExpr>(ME->getBase()) &&
7150 "Base should be the current struct!");
7151 MatchingVD = ME->getMemberDecl();
7154 // If we don't have information about the current list item, move on to
7155 // the next one.
7156 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7157 if (InitAddrIt == CaptureDeviceAddrMap.end())
7158 continue;
7160 // Initialize the temporary initialization variable with the address
7161 // we get from the runtime library. We have to cast the source address
7162 // because it is always a void *. References are materialized in the
7163 // privatization scope, so the initialization here disregards the fact
7164 // the original variable is a reference.
7165 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7166 Address InitAddr = Builder.CreateElementBitCast(InitAddrIt->second, Ty);
7167 setAddrOfLocalVar(InitVD, InitAddr);
7169 // Emit private declaration, it will be initialized by the value we
7170 // declaration we just added to the local declarations map.
7171 EmitDecl(*PvtVD);
7173 // The initialization variables reached its purpose in the emission
7174 // of the previous declaration, so we don't need it anymore.
7175 LocalDeclMap.erase(InitVD);
7177 // Return the address of the private variable.
7178 bool IsRegistered =
7179 PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(PvtVD));
7180 assert(IsRegistered && "firstprivate var already registered as private");
7181 // Silence the warning about unused variable.
7182 (void)IsRegistered;
7184 ++OrigVarIt;
7185 ++InitIt;
7189 static const VarDecl *getBaseDecl(const Expr *Ref) {
7190 const Expr *Base = Ref->IgnoreParenImpCasts();
7191 while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
7192 Base = OASE->getBase()->IgnoreParenImpCasts();
7193 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
7194 Base = ASE->getBase()->IgnoreParenImpCasts();
7195 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
7198 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7199 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
7200 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
7201 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7202 for (const Expr *Ref : C.varlists()) {
7203 const VarDecl *OrigVD = getBaseDecl(Ref);
7204 if (!Processed.insert(OrigVD).second)
7205 continue;
7206 // In order to identify the right initializer we need to match the
7207 // declaration used by the mapping logic. In some cases we may get
7208 // OMPCapturedExprDecl that refers to the original declaration.
7209 const ValueDecl *MatchingVD = OrigVD;
7210 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7211 // OMPCapturedExprDecl are used to privative fields of the current
7212 // structure.
7213 const auto *ME = cast<MemberExpr>(OED->getInit());
7214 assert(isa<CXXThisExpr>(ME->getBase()) &&
7215 "Base should be the current struct!");
7216 MatchingVD = ME->getMemberDecl();
7219 // If we don't have information about the current list item, move on to
7220 // the next one.
7221 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7222 if (InitAddrIt == CaptureDeviceAddrMap.end())
7223 continue;
7225 Address PrivAddr = InitAddrIt->getSecond();
7226 // For declrefs and variable length array need to load the pointer for
7227 // correct mapping, since the pointer to the data was passed to the runtime.
7228 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
7229 MatchingVD->getType()->isArrayType()) {
7230 QualType PtrTy = getContext().getPointerType(
7231 OrigVD->getType().getNonReferenceType());
7232 PrivAddr = EmitLoadOfPointer(
7233 Builder.CreateElementBitCast(PrivAddr, ConvertTypeForMem(PtrTy)),
7234 PtrTy->castAs<PointerType>());
7237 (void)PrivateScope.addPrivate(OrigVD, PrivAddr);
7241 // Generate the instructions for '#pragma omp target data' directive.
7242 void CodeGenFunction::EmitOMPTargetDataDirective(
7243 const OMPTargetDataDirective &S) {
7244 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7245 /*SeparateBeginEndCalls=*/true);
7247 // Create a pre/post action to signal the privatization of the device pointer.
7248 // This action can be replaced by the OpenMP runtime code generation to
7249 // deactivate privatization.
7250 bool PrivatizeDevicePointers = false;
7251 class DevicePointerPrivActionTy : public PrePostActionTy {
7252 bool &PrivatizeDevicePointers;
7254 public:
7255 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7256 : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7257 void Enter(CodeGenFunction &CGF) override {
7258 PrivatizeDevicePointers = true;
7261 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7263 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
7264 CodeGenFunction &CGF, PrePostActionTy &Action) {
7265 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7266 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
7269 // Codegen that selects whether to generate the privatization code or not.
7270 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
7271 &InnermostCodeGen](CodeGenFunction &CGF,
7272 PrePostActionTy &Action) {
7273 RegionCodeGenTy RCG(InnermostCodeGen);
7274 PrivatizeDevicePointers = false;
7276 // Call the pre-action to change the status of PrivatizeDevicePointers if
7277 // needed.
7278 Action.Enter(CGF);
7280 if (PrivatizeDevicePointers) {
7281 OMPPrivateScope PrivateScope(CGF);
7282 // Emit all instances of the use_device_ptr clause.
7283 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7284 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
7285 Info.CaptureDeviceAddrMap);
7286 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7287 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
7288 Info.CaptureDeviceAddrMap);
7289 (void)PrivateScope.Privatize();
7290 RCG(CGF);
7291 } else {
7292 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
7293 RCG(CGF);
7297 // Forward the provided action to the privatization codegen.
7298 RegionCodeGenTy PrivRCG(PrivCodeGen);
7299 PrivRCG.setAction(Action);
7301 // Notwithstanding the body of the region is emitted as inlined directive,
7302 // we don't use an inline scope as changes in the references inside the
7303 // region are expected to be visible outside, so we do not privative them.
7304 OMPLexicalScope Scope(CGF, S);
7305 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
7306 PrivRCG);
7309 RegionCodeGenTy RCG(CodeGen);
7311 // If we don't have target devices, don't bother emitting the data mapping
7312 // code.
7313 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7314 RCG(*this);
7315 return;
7318 // Check if we have any if clause associated with the directive.
7319 const Expr *IfCond = nullptr;
7320 if (const auto *C = S.getSingleClause<OMPIfClause>())
7321 IfCond = C->getCondition();
7323 // Check if we have any device clause associated with the directive.
7324 const Expr *Device = nullptr;
7325 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7326 Device = C->getDevice();
7328 // Set the action to signal privatization of device pointers.
7329 RCG.setAction(PrivAction);
7331 // Emit region code.
7332 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
7333 Info);
7336 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7337 const OMPTargetEnterDataDirective &S) {
7338 // If we don't have target devices, don't bother emitting the data mapping
7339 // code.
7340 if (CGM.getLangOpts().OMPTargetTriples.empty())
7341 return;
7343 // Check if we have any if clause associated with the directive.
7344 const Expr *IfCond = nullptr;
7345 if (const auto *C = S.getSingleClause<OMPIfClause>())
7346 IfCond = C->getCondition();
7348 // Check if we have any device clause associated with the directive.
7349 const Expr *Device = nullptr;
7350 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7351 Device = C->getDevice();
7353 OMPLexicalScope Scope(*this, S, OMPD_task);
7354 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7357 void CodeGenFunction::EmitOMPTargetExitDataDirective(
7358 const OMPTargetExitDataDirective &S) {
7359 // If we don't have target devices, don't bother emitting the data mapping
7360 // code.
7361 if (CGM.getLangOpts().OMPTargetTriples.empty())
7362 return;
7364 // Check if we have any if clause associated with the directive.
7365 const Expr *IfCond = nullptr;
7366 if (const auto *C = S.getSingleClause<OMPIfClause>())
7367 IfCond = C->getCondition();
7369 // Check if we have any device clause associated with the directive.
7370 const Expr *Device = nullptr;
7371 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7372 Device = C->getDevice();
7374 OMPLexicalScope Scope(*this, S, OMPD_task);
7375 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7378 static void emitTargetParallelRegion(CodeGenFunction &CGF,
7379 const OMPTargetParallelDirective &S,
7380 PrePostActionTy &Action) {
7381 // Get the captured statement associated with the 'parallel' region.
7382 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
7383 Action.Enter(CGF);
7384 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7385 Action.Enter(CGF);
7386 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7387 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
7388 CGF.EmitOMPPrivateClause(S, PrivateScope);
7389 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7390 (void)PrivateScope.Privatize();
7391 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
7392 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
7393 // TODO: Add support for clauses.
7394 CGF.EmitStmt(CS->getCapturedStmt());
7395 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
7397 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
7398 emitEmptyBoundParameters);
7399 emitPostUpdateForReductionClause(CGF, S,
7400 [](CodeGenFunction &) { return nullptr; });
7403 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7404 CodeGenModule &CGM, StringRef ParentName,
7405 const OMPTargetParallelDirective &S) {
7406 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7407 emitTargetParallelRegion(CGF, S, Action);
7409 llvm::Function *Fn;
7410 llvm::Constant *Addr;
7411 // Emit target region as a standalone region.
7412 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7413 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7414 assert(Fn && Addr && "Target device function emission failed.");
7417 void CodeGenFunction::EmitOMPTargetParallelDirective(
7418 const OMPTargetParallelDirective &S) {
7419 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7420 emitTargetParallelRegion(CGF, S, Action);
7422 emitCommonOMPTargetDirective(*this, S, CodeGen);
7425 static void emitTargetParallelForRegion(CodeGenFunction &CGF,
7426 const OMPTargetParallelForDirective &S,
7427 PrePostActionTy &Action) {
7428 Action.Enter(CGF);
7429 // Emit directive as a combined directive that consists of two implicit
7430 // directives: 'parallel' with 'for' directive.
7431 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7432 Action.Enter(CGF);
7433 CodeGenFunction::OMPCancelStackRAII CancelRegion(
7434 CGF, OMPD_target_parallel_for, S.hasCancel());
7435 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7436 emitDispatchForLoopBounds);
7438 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
7439 emitEmptyBoundParameters);
7442 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7443 CodeGenModule &CGM, StringRef ParentName,
7444 const OMPTargetParallelForDirective &S) {
7445 // Emit SPMD target parallel for region as a standalone region.
7446 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7447 emitTargetParallelForRegion(CGF, S, Action);
7449 llvm::Function *Fn;
7450 llvm::Constant *Addr;
7451 // Emit target region as a standalone region.
7452 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7453 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7454 assert(Fn && Addr && "Target device function emission failed.");
7457 void CodeGenFunction::EmitOMPTargetParallelForDirective(
7458 const OMPTargetParallelForDirective &S) {
7459 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7460 emitTargetParallelForRegion(CGF, S, Action);
7462 emitCommonOMPTargetDirective(*this, S, CodeGen);
7465 static void
7466 emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
7467 const OMPTargetParallelForSimdDirective &S,
7468 PrePostActionTy &Action) {
7469 Action.Enter(CGF);
7470 // Emit directive as a combined directive that consists of two implicit
7471 // directives: 'parallel' with 'for' directive.
7472 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7473 Action.Enter(CGF);
7474 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7475 emitDispatchForLoopBounds);
7477 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
7478 emitEmptyBoundParameters);
7481 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7482 CodeGenModule &CGM, StringRef ParentName,
7483 const OMPTargetParallelForSimdDirective &S) {
7484 // Emit SPMD target parallel for region as a standalone region.
7485 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7486 emitTargetParallelForSimdRegion(CGF, S, Action);
7488 llvm::Function *Fn;
7489 llvm::Constant *Addr;
7490 // Emit target region as a standalone region.
7491 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7492 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7493 assert(Fn && Addr && "Target device function emission failed.");
7496 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7497 const OMPTargetParallelForSimdDirective &S) {
7498 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7499 emitTargetParallelForSimdRegion(CGF, S, Action);
7501 emitCommonOMPTargetDirective(*this, S, CodeGen);
7504 /// Emit a helper variable and return corresponding lvalue.
7505 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
7506 const ImplicitParamDecl *PVD,
7507 CodeGenFunction::OMPPrivateScope &Privates) {
7508 const auto *VDecl = cast<VarDecl>(Helper->getDecl());
7509 Privates.addPrivate(VDecl, CGF.GetAddrOfLocalVar(PVD));
7512 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
7513 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
7514 // Emit outlined function for task construct.
7515 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
7516 Address CapturedStruct = Address::invalid();
7518 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7519 CapturedStruct = GenerateCapturedStmtArgument(*CS);
7521 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
7522 const Expr *IfCond = nullptr;
7523 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7524 if (C->getNameModifier() == OMPD_unknown ||
7525 C->getNameModifier() == OMPD_taskloop) {
7526 IfCond = C->getCondition();
7527 break;
7531 OMPTaskDataTy Data;
7532 // Check if taskloop must be emitted without taskgroup.
7533 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
7534 // TODO: Check if we should emit tied or untied task.
7535 Data.Tied = true;
7536 // Set scheduling for taskloop
7537 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
7538 // grainsize clause
7539 Data.Schedule.setInt(/*IntVal=*/false);
7540 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
7541 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
7542 // num_tasks clause
7543 Data.Schedule.setInt(/*IntVal=*/true);
7544 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
7547 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
7548 // if (PreCond) {
7549 // for (IV in 0..LastIteration) BODY;
7550 // <Final counter/linear vars updates>;
7551 // }
7554 // Emit: if (PreCond) - begin.
7555 // If the condition constant folds and can be elided, avoid emitting the
7556 // whole loop.
7557 bool CondConstant;
7558 llvm::BasicBlock *ContBlock = nullptr;
7559 OMPLoopScope PreInitScope(CGF, S);
7560 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
7561 if (!CondConstant)
7562 return;
7563 } else {
7564 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
7565 ContBlock = CGF.createBasicBlock("taskloop.if.end");
7566 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
7567 CGF.getProfileCount(&S));
7568 CGF.EmitBlock(ThenBlock);
7569 CGF.incrementProfileCounter(&S);
7572 (void)CGF.EmitOMPLinearClauseInit(S);
7574 OMPPrivateScope LoopScope(CGF);
7575 // Emit helper vars inits.
7576 enum { LowerBound = 5, UpperBound, Stride, LastIter };
7577 auto *I = CS->getCapturedDecl()->param_begin();
7578 auto *LBP = std::next(I, LowerBound);
7579 auto *UBP = std::next(I, UpperBound);
7580 auto *STP = std::next(I, Stride);
7581 auto *LIP = std::next(I, LastIter);
7582 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
7583 LoopScope);
7584 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
7585 LoopScope);
7586 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
7587 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
7588 LoopScope);
7589 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7590 CGF.EmitOMPLinearClause(S, LoopScope);
7591 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
7592 (void)LoopScope.Privatize();
7593 // Emit the loop iteration variable.
7594 const Expr *IVExpr = S.getIterationVariable();
7595 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
7596 CGF.EmitVarDecl(*IVDecl);
7597 CGF.EmitIgnoredExpr(S.getInit());
7599 // Emit the iterations count variable.
7600 // If it is not a variable, Sema decided to calculate iterations count on
7601 // each iteration (e.g., it is foldable into a constant).
7602 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
7603 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
7604 // Emit calculation of the iterations count.
7605 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
7609 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7610 emitCommonSimdLoop(
7611 CGF, S,
7612 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7613 if (isOpenMPSimdDirective(S.getDirectiveKind()))
7614 CGF.EmitOMPSimdInit(S);
7616 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
7617 CGF.EmitOMPInnerLoop(
7618 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
7619 [&S](CodeGenFunction &CGF) {
7620 emitOMPLoopBodyWithStopPoint(CGF, S,
7621 CodeGenFunction::JumpDest());
7623 [](CodeGenFunction &) {});
7626 // Emit: if (PreCond) - end.
7627 if (ContBlock) {
7628 CGF.EmitBranch(ContBlock);
7629 CGF.EmitBlock(ContBlock, true);
7631 // Emit final copy of the lastprivate variables if IsLastIter != 0.
7632 if (HasLastprivateClause) {
7633 CGF.EmitOMPLastprivateClauseFinal(
7634 S, isOpenMPSimdDirective(S.getDirectiveKind()),
7635 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
7636 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7637 (*LIP)->getType(), S.getBeginLoc())));
7639 LoopScope.restoreMap();
7640 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
7641 return CGF.Builder.CreateIsNotNull(
7642 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7643 (*LIP)->getType(), S.getBeginLoc()));
7646 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
7647 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
7648 const OMPTaskDataTy &Data) {
7649 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
7650 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
7651 OMPLoopScope PreInitScope(CGF, S);
7652 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
7653 OutlinedFn, SharedsTy,
7654 CapturedStruct, IfCond, Data);
7656 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
7657 CodeGen);
7659 if (Data.Nogroup) {
7660 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
7661 } else {
7662 CGM.getOpenMPRuntime().emitTaskgroupRegion(
7663 *this,
7664 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
7665 PrePostActionTy &Action) {
7666 Action.Enter(CGF);
7667 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
7668 Data);
7670 S.getBeginLoc());
7674 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
7675 auto LPCRegion =
7676 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7677 EmitOMPTaskLoopBasedDirective(S);
7680 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
7681 const OMPTaskLoopSimdDirective &S) {
7682 auto LPCRegion =
7683 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7684 OMPLexicalScope Scope(*this, S);
7685 EmitOMPTaskLoopBasedDirective(S);
7688 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7689 const OMPMasterTaskLoopDirective &S) {
7690 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7691 Action.Enter(CGF);
7692 EmitOMPTaskLoopBasedDirective(S);
7694 auto LPCRegion =
7695 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7696 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
7697 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7700 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7701 const OMPMasterTaskLoopSimdDirective &S) {
7702 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7703 Action.Enter(CGF);
7704 EmitOMPTaskLoopBasedDirective(S);
7706 auto LPCRegion =
7707 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7708 OMPLexicalScope Scope(*this, S);
7709 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7712 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7713 const OMPParallelMasterTaskLoopDirective &S) {
7714 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7715 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7716 PrePostActionTy &Action) {
7717 Action.Enter(CGF);
7718 CGF.EmitOMPTaskLoopBasedDirective(S);
7720 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7721 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7722 S.getBeginLoc());
7724 auto LPCRegion =
7725 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7726 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
7727 emitEmptyBoundParameters);
7730 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7731 const OMPParallelMasterTaskLoopSimdDirective &S) {
7732 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7733 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7734 PrePostActionTy &Action) {
7735 Action.Enter(CGF);
7736 CGF.EmitOMPTaskLoopBasedDirective(S);
7738 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7739 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7740 S.getBeginLoc());
7742 auto LPCRegion =
7743 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7744 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
7745 emitEmptyBoundParameters);
7748 // Generate the instructions for '#pragma omp target update' directive.
7749 void CodeGenFunction::EmitOMPTargetUpdateDirective(
7750 const OMPTargetUpdateDirective &S) {
7751 // If we don't have target devices, don't bother emitting the data mapping
7752 // code.
7753 if (CGM.getLangOpts().OMPTargetTriples.empty())
7754 return;
7756 // Check if we have any if clause associated with the directive.
7757 const Expr *IfCond = nullptr;
7758 if (const auto *C = S.getSingleClause<OMPIfClause>())
7759 IfCond = C->getCondition();
7761 // Check if we have any device clause associated with the directive.
7762 const Expr *Device = nullptr;
7763 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7764 Device = C->getDevice();
7766 OMPLexicalScope Scope(*this, S, OMPD_task);
7767 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7770 void CodeGenFunction::EmitOMPGenericLoopDirective(
7771 const OMPGenericLoopDirective &S) {
7772 // Unimplemented, just inline the underlying statement for now.
7773 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7774 // Emit the loop iteration variable.
7775 const Stmt *CS =
7776 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
7777 const auto *ForS = dyn_cast<ForStmt>(CS);
7778 if (ForS && !isa<DeclStmt>(ForS->getInit())) {
7779 OMPPrivateScope LoopScope(CGF);
7780 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7781 (void)LoopScope.Privatize();
7782 CGF.EmitStmt(CS);
7783 LoopScope.restoreMap();
7784 } else {
7785 CGF.EmitStmt(CS);
7788 OMPLexicalScope Scope(*this, S, OMPD_unknown);
7789 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen);
7792 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
7793 const OMPExecutableDirective &D) {
7794 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
7795 EmitOMPScanDirective(*SD);
7796 return;
7798 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
7799 return;
7800 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
7801 OMPPrivateScope GlobalsScope(CGF);
7802 if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
7803 // Capture global firstprivates to avoid crash.
7804 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
7805 for (const Expr *Ref : C->varlists()) {
7806 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
7807 if (!DRE)
7808 continue;
7809 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
7810 if (!VD || VD->hasLocalStorage())
7811 continue;
7812 if (!CGF.LocalDeclMap.count(VD)) {
7813 LValue GlobLVal = CGF.EmitLValue(Ref);
7814 GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF));
7819 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
7820 (void)GlobalsScope.Privatize();
7821 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
7822 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
7823 } else {
7824 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
7825 for (const Expr *E : LD->counters()) {
7826 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
7827 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
7828 LValue GlobLVal = CGF.EmitLValue(E);
7829 GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF));
7831 if (isa<OMPCapturedExprDecl>(VD)) {
7832 // Emit only those that were not explicitly referenced in clauses.
7833 if (!CGF.LocalDeclMap.count(VD))
7834 CGF.EmitVarDecl(*VD);
7837 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
7838 if (!C->getNumForLoops())
7839 continue;
7840 for (unsigned I = LD->getLoopsNumber(),
7841 E = C->getLoopNumIterations().size();
7842 I < E; ++I) {
7843 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
7844 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
7845 // Emit only those that were not explicitly referenced in clauses.
7846 if (!CGF.LocalDeclMap.count(VD))
7847 CGF.EmitVarDecl(*VD);
7852 (void)GlobalsScope.Privatize();
7853 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
7856 if (D.getDirectiveKind() == OMPD_atomic ||
7857 D.getDirectiveKind() == OMPD_critical ||
7858 D.getDirectiveKind() == OMPD_section ||
7859 D.getDirectiveKind() == OMPD_master ||
7860 D.getDirectiveKind() == OMPD_masked) {
7861 EmitStmt(D.getAssociatedStmt());
7862 } else {
7863 auto LPCRegion =
7864 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
7865 OMPSimdLexicalScope Scope(*this, D);
7866 CGM.getOpenMPRuntime().emitInlinedDirective(
7867 *this,
7868 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
7869 : D.getDirectiveKind(),
7870 CodeGen);
7872 // Check for outer lastprivate conditional update.
7873 checkForLastprivateConditionalUpdate(*this, D);