1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This contains code to emit OpenMP nodes as LLVM code.
11 //===----------------------------------------------------------------------===//
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/BinaryFormat/Dwarf.h"
30 #include "llvm/Frontend/OpenMP/OMPConstants.h"
31 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DebugInfoMetadata.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Metadata.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Debug.h"
40 using namespace clang
;
41 using namespace CodeGen
;
42 using namespace llvm::omp
;
44 #define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
46 static const VarDecl
*getBaseDecl(const Expr
*Ref
);
49 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
50 /// for captured expressions.
51 class OMPLexicalScope
: public CodeGenFunction::LexicalScope
{
52 void emitPreInitStmt(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
53 for (const auto *C
: S
.clauses()) {
54 if (const auto *CPI
= OMPClauseWithPreInit::get(C
)) {
55 if (const auto *PreInit
=
56 cast_or_null
<DeclStmt
>(CPI
->getPreInitStmt())) {
57 for (const auto *I
: PreInit
->decls()) {
58 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
59 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
61 CodeGenFunction::AutoVarEmission Emission
=
62 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
63 CGF
.EmitAutoVarCleanups(Emission
);
70 CodeGenFunction::OMPPrivateScope InlinedShareds
;
72 static bool isCapturedVar(CodeGenFunction
&CGF
, const VarDecl
*VD
) {
73 return CGF
.LambdaCaptureFields
.lookup(VD
) ||
74 (CGF
.CapturedStmtInfo
&& CGF
.CapturedStmtInfo
->lookup(VD
)) ||
75 (isa_and_nonnull
<BlockDecl
>(CGF
.CurCodeDecl
) &&
76 cast
<BlockDecl
>(CGF
.CurCodeDecl
)->capturesVariable(VD
));
81 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
82 const std::optional
<OpenMPDirectiveKind
> CapturedRegion
= std::nullopt
,
83 const bool EmitPreInitStmt
= true)
84 : CodeGenFunction::LexicalScope(CGF
, S
.getSourceRange()),
87 emitPreInitStmt(CGF
, S
);
90 assert(S
.hasAssociatedStmt() &&
91 "Expected associated statement for inlined directive.");
92 const CapturedStmt
*CS
= S
.getCapturedStmt(*CapturedRegion
);
93 for (const auto &C
: CS
->captures()) {
94 if (C
.capturesVariable() || C
.capturesVariableByCopy()) {
95 auto *VD
= C
.getCapturedVar();
96 assert(VD
== VD
->getCanonicalDecl() &&
97 "Canonical decl must be captured.");
99 CGF
.getContext(), const_cast<VarDecl
*>(VD
),
100 isCapturedVar(CGF
, VD
) || (CGF
.CapturedStmtInfo
&&
101 InlinedShareds
.isGlobalVarCaptured(VD
)),
102 VD
->getType().getNonReferenceType(), VK_LValue
, C
.getLocation());
103 InlinedShareds
.addPrivate(VD
, CGF
.EmitLValue(&DRE
).getAddress());
106 (void)InlinedShareds
.Privatize();
110 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
111 /// for captured expressions.
112 class OMPParallelScope final
: public OMPLexicalScope
{
113 bool EmitPreInitStmt(const OMPExecutableDirective
&S
) {
114 OpenMPDirectiveKind Kind
= S
.getDirectiveKind();
115 return !(isOpenMPTargetExecutionDirective(Kind
) ||
116 isOpenMPLoopBoundSharingDirective(Kind
)) &&
117 isOpenMPParallelDirective(Kind
);
121 OMPParallelScope(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
122 : OMPLexicalScope(CGF
, S
, /*CapturedRegion=*/std::nullopt
,
123 EmitPreInitStmt(S
)) {}
126 /// Lexical scope for OpenMP teams construct, that handles correct codegen
127 /// for captured expressions.
128 class OMPTeamsScope final
: public OMPLexicalScope
{
129 bool EmitPreInitStmt(const OMPExecutableDirective
&S
) {
130 OpenMPDirectiveKind Kind
= S
.getDirectiveKind();
131 return !isOpenMPTargetExecutionDirective(Kind
) &&
132 isOpenMPTeamsDirective(Kind
);
136 OMPTeamsScope(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
137 : OMPLexicalScope(CGF
, S
, /*CapturedRegion=*/std::nullopt
,
138 EmitPreInitStmt(S
)) {}
141 /// Private scope for OpenMP loop-based directives, that supports capturing
142 /// of used expression from loop statement.
143 class OMPLoopScope
: public CodeGenFunction::RunCleanupsScope
{
144 void emitPreInitStmt(CodeGenFunction
&CGF
, const OMPLoopBasedDirective
&S
) {
145 const Stmt
*PreInits
;
146 CodeGenFunction::OMPMapVars PreCondVars
;
147 if (auto *LD
= dyn_cast
<OMPLoopDirective
>(&S
)) {
148 llvm::DenseSet
<const VarDecl
*> EmittedAsPrivate
;
149 for (const auto *E
: LD
->counters()) {
150 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
151 EmittedAsPrivate
.insert(VD
->getCanonicalDecl());
152 (void)PreCondVars
.setVarAddr(
153 CGF
, VD
, CGF
.CreateMemTemp(VD
->getType().getNonReferenceType()));
155 // Mark private vars as undefs.
156 for (const auto *C
: LD
->getClausesOfKind
<OMPPrivateClause
>()) {
157 for (const Expr
*IRef
: C
->varlists()) {
159 cast
<VarDecl
>(cast
<DeclRefExpr
>(IRef
)->getDecl());
160 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
161 QualType OrigVDTy
= OrigVD
->getType().getNonReferenceType();
162 (void)PreCondVars
.setVarAddr(
164 Address(llvm::UndefValue::get(CGF
.ConvertTypeForMem(
165 CGF
.getContext().getPointerType(OrigVDTy
))),
166 CGF
.ConvertTypeForMem(OrigVDTy
),
167 CGF
.getContext().getDeclAlign(OrigVD
)));
171 (void)PreCondVars
.apply(CGF
);
172 // Emit init, __range and __end variables for C++ range loops.
173 (void)OMPLoopBasedDirective::doForAllLoops(
174 LD
->getInnermostCapturedStmt()->getCapturedStmt(),
175 /*TryImperfectlyNestedLoops=*/true, LD
->getLoopsNumber(),
176 [&CGF
](unsigned Cnt
, const Stmt
*CurStmt
) {
177 if (const auto *CXXFor
= dyn_cast
<CXXForRangeStmt
>(CurStmt
)) {
178 if (const Stmt
*Init
= CXXFor
->getInit())
180 CGF
.EmitStmt(CXXFor
->getRangeStmt());
181 CGF
.EmitStmt(CXXFor
->getEndStmt());
185 PreInits
= LD
->getPreInits();
186 } else if (const auto *Tile
= dyn_cast
<OMPTileDirective
>(&S
)) {
187 PreInits
= Tile
->getPreInits();
188 } else if (const auto *Unroll
= dyn_cast
<OMPUnrollDirective
>(&S
)) {
189 PreInits
= Unroll
->getPreInits();
190 } else if (const auto *Reverse
= dyn_cast
<OMPReverseDirective
>(&S
)) {
191 PreInits
= Reverse
->getPreInits();
192 } else if (const auto *Interchange
=
193 dyn_cast
<OMPInterchangeDirective
>(&S
)) {
194 PreInits
= Interchange
->getPreInits();
196 llvm_unreachable("Unknown loop-based directive kind.");
199 // CompoundStmts and DeclStmts are used as lists of PreInit statements and
200 // declarations. Since declarations must be visible in the the following
201 // that they initialize, unpack the CompoundStmt they are nested in.
202 SmallVector
<const Stmt
*> PreInitStmts
;
203 if (auto *PreInitCompound
= dyn_cast
<CompoundStmt
>(PreInits
))
204 llvm::append_range(PreInitStmts
, PreInitCompound
->body());
206 PreInitStmts
.push_back(PreInits
);
208 for (const Stmt
*S
: PreInitStmts
) {
209 // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted
211 if (auto *PreInitDecl
= dyn_cast
<DeclStmt
>(S
)) {
212 for (Decl
*I
: PreInitDecl
->decls())
213 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
219 PreCondVars
.restore(CGF
);
223 OMPLoopScope(CodeGenFunction
&CGF
, const OMPLoopBasedDirective
&S
)
224 : CodeGenFunction::RunCleanupsScope(CGF
) {
225 emitPreInitStmt(CGF
, S
);
229 class OMPSimdLexicalScope
: public CodeGenFunction::LexicalScope
{
230 CodeGenFunction::OMPPrivateScope InlinedShareds
;
232 static bool isCapturedVar(CodeGenFunction
&CGF
, const VarDecl
*VD
) {
233 return CGF
.LambdaCaptureFields
.lookup(VD
) ||
234 (CGF
.CapturedStmtInfo
&& CGF
.CapturedStmtInfo
->lookup(VD
)) ||
235 (isa_and_nonnull
<BlockDecl
>(CGF
.CurCodeDecl
) &&
236 cast
<BlockDecl
>(CGF
.CurCodeDecl
)->capturesVariable(VD
));
240 OMPSimdLexicalScope(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
241 : CodeGenFunction::LexicalScope(CGF
, S
.getSourceRange()),
242 InlinedShareds(CGF
) {
243 for (const auto *C
: S
.clauses()) {
244 if (const auto *CPI
= OMPClauseWithPreInit::get(C
)) {
245 if (const auto *PreInit
=
246 cast_or_null
<DeclStmt
>(CPI
->getPreInitStmt())) {
247 for (const auto *I
: PreInit
->decls()) {
248 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
249 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
251 CodeGenFunction::AutoVarEmission Emission
=
252 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
253 CGF
.EmitAutoVarCleanups(Emission
);
257 } else if (const auto *UDP
= dyn_cast
<OMPUseDevicePtrClause
>(C
)) {
258 for (const Expr
*E
: UDP
->varlists()) {
259 const Decl
*D
= cast
<DeclRefExpr
>(E
)->getDecl();
260 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(D
))
261 CGF
.EmitVarDecl(*OED
);
263 } else if (const auto *UDP
= dyn_cast
<OMPUseDeviceAddrClause
>(C
)) {
264 for (const Expr
*E
: UDP
->varlists()) {
265 const Decl
*D
= getBaseDecl(E
);
266 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(D
))
267 CGF
.EmitVarDecl(*OED
);
271 if (!isOpenMPSimdDirective(S
.getDirectiveKind()))
272 CGF
.EmitOMPPrivateClause(S
, InlinedShareds
);
273 if (const auto *TG
= dyn_cast
<OMPTaskgroupDirective
>(&S
)) {
274 if (const Expr
*E
= TG
->getReductionRef())
275 CGF
.EmitVarDecl(*cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl()));
277 // Temp copy arrays for inscan reductions should not be emitted as they are
278 // not used in simd only mode.
279 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> CopyArrayTemps
;
280 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
281 if (C
->getModifier() != OMPC_REDUCTION_inscan
)
283 for (const Expr
*E
: C
->copy_array_temps())
284 CopyArrayTemps
.insert(cast
<DeclRefExpr
>(E
)->getDecl());
286 const auto *CS
= cast_or_null
<CapturedStmt
>(S
.getAssociatedStmt());
288 for (auto &C
: CS
->captures()) {
289 if (C
.capturesVariable() || C
.capturesVariableByCopy()) {
290 auto *VD
= C
.getCapturedVar();
291 if (CopyArrayTemps
.contains(VD
))
293 assert(VD
== VD
->getCanonicalDecl() &&
294 "Canonical decl must be captured.");
295 DeclRefExpr
DRE(CGF
.getContext(), const_cast<VarDecl
*>(VD
),
296 isCapturedVar(CGF
, VD
) ||
297 (CGF
.CapturedStmtInfo
&&
298 InlinedShareds
.isGlobalVarCaptured(VD
)),
299 VD
->getType().getNonReferenceType(), VK_LValue
,
301 InlinedShareds
.addPrivate(VD
, CGF
.EmitLValue(&DRE
).getAddress());
304 CS
= dyn_cast
<CapturedStmt
>(CS
->getCapturedStmt());
306 (void)InlinedShareds
.Privatize();
312 static void emitCommonOMPTargetDirective(CodeGenFunction
&CGF
,
313 const OMPExecutableDirective
&S
,
314 const RegionCodeGenTy
&CodeGen
);
316 LValue
CodeGenFunction::EmitOMPSharedLValue(const Expr
*E
) {
317 if (const auto *OrigDRE
= dyn_cast
<DeclRefExpr
>(E
)) {
318 if (const auto *OrigVD
= dyn_cast
<VarDecl
>(OrigDRE
->getDecl())) {
319 OrigVD
= OrigVD
->getCanonicalDecl();
321 LambdaCaptureFields
.lookup(OrigVD
) ||
322 (CapturedStmtInfo
&& CapturedStmtInfo
->lookup(OrigVD
)) ||
323 (isa_and_nonnull
<BlockDecl
>(CurCodeDecl
));
324 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
), IsCaptured
,
325 OrigDRE
->getType(), VK_LValue
, OrigDRE
->getExprLoc());
326 return EmitLValue(&DRE
);
329 return EmitLValue(E
);
332 llvm::Value
*CodeGenFunction::getTypeSize(QualType Ty
) {
333 ASTContext
&C
= getContext();
334 llvm::Value
*Size
= nullptr;
335 auto SizeInChars
= C
.getTypeSizeInChars(Ty
);
336 if (SizeInChars
.isZero()) {
337 // getTypeSizeInChars() returns 0 for a VLA.
338 while (const VariableArrayType
*VAT
= C
.getAsVariableArrayType(Ty
)) {
339 VlaSizePair VlaSize
= getVLASize(VAT
);
342 Size
? Builder
.CreateNUWMul(Size
, VlaSize
.NumElts
) : VlaSize
.NumElts
;
344 SizeInChars
= C
.getTypeSizeInChars(Ty
);
345 if (SizeInChars
.isZero())
346 return llvm::ConstantInt::get(SizeTy
, /*V=*/0);
347 return Builder
.CreateNUWMul(Size
, CGM
.getSize(SizeInChars
));
349 return CGM
.getSize(SizeInChars
);
352 void CodeGenFunction::GenerateOpenMPCapturedVars(
353 const CapturedStmt
&S
, SmallVectorImpl
<llvm::Value
*> &CapturedVars
) {
354 const RecordDecl
*RD
= S
.getCapturedRecordDecl();
355 auto CurField
= RD
->field_begin();
356 auto CurCap
= S
.captures().begin();
357 for (CapturedStmt::const_capture_init_iterator I
= S
.capture_init_begin(),
358 E
= S
.capture_init_end();
359 I
!= E
; ++I
, ++CurField
, ++CurCap
) {
360 if (CurField
->hasCapturedVLAType()) {
361 const VariableArrayType
*VAT
= CurField
->getCapturedVLAType();
362 llvm::Value
*Val
= VLASizeMap
[VAT
->getSizeExpr()];
363 CapturedVars
.push_back(Val
);
364 } else if (CurCap
->capturesThis()) {
365 CapturedVars
.push_back(CXXThisValue
);
366 } else if (CurCap
->capturesVariableByCopy()) {
367 llvm::Value
*CV
= EmitLoadOfScalar(EmitLValue(*I
), CurCap
->getLocation());
369 // If the field is not a pointer, we need to save the actual value
370 // and load it as a void pointer.
371 if (!CurField
->getType()->isAnyPointerType()) {
372 ASTContext
&Ctx
= getContext();
373 Address DstAddr
= CreateMemTemp(
374 Ctx
.getUIntPtrType(),
375 Twine(CurCap
->getCapturedVar()->getName(), ".casted"));
376 LValue DstLV
= MakeAddrLValue(DstAddr
, Ctx
.getUIntPtrType());
378 llvm::Value
*SrcAddrVal
= EmitScalarConversion(
379 DstAddr
.emitRawPointer(*this),
380 Ctx
.getPointerType(Ctx
.getUIntPtrType()),
381 Ctx
.getPointerType(CurField
->getType()), CurCap
->getLocation());
383 MakeNaturalAlignAddrLValue(SrcAddrVal
, CurField
->getType());
385 // Store the value using the source type pointer.
386 EmitStoreThroughLValue(RValue::get(CV
), SrcLV
);
388 // Load the value using the destination type pointer.
389 CV
= EmitLoadOfScalar(DstLV
, CurCap
->getLocation());
391 CapturedVars
.push_back(CV
);
393 assert(CurCap
->capturesVariable() && "Expected capture by reference.");
394 CapturedVars
.push_back(EmitLValue(*I
).getAddress().emitRawPointer(*this));
399 static Address
castValueFromUintptr(CodeGenFunction
&CGF
, SourceLocation Loc
,
400 QualType DstType
, StringRef Name
,
402 ASTContext
&Ctx
= CGF
.getContext();
404 llvm::Value
*CastedPtr
= CGF
.EmitScalarConversion(
405 AddrLV
.getAddress().emitRawPointer(CGF
), Ctx
.getUIntPtrType(),
406 Ctx
.getPointerType(DstType
), Loc
);
407 // FIXME: should the pointee type (DstType) be passed?
409 CGF
.MakeNaturalAlignAddrLValue(CastedPtr
, DstType
).getAddress();
413 static QualType
getCanonicalParamType(ASTContext
&C
, QualType T
) {
414 if (T
->isLValueReferenceType())
415 return C
.getLValueReferenceType(
416 getCanonicalParamType(C
, T
.getNonReferenceType()),
417 /*SpelledAsLValue=*/false);
418 if (T
->isPointerType())
419 return C
.getPointerType(getCanonicalParamType(C
, T
->getPointeeType()));
420 if (const ArrayType
*A
= T
->getAsArrayTypeUnsafe()) {
421 if (const auto *VLA
= dyn_cast
<VariableArrayType
>(A
))
422 return getCanonicalParamType(C
, VLA
->getElementType());
423 if (!A
->isVariablyModifiedType())
424 return C
.getCanonicalType(T
);
426 return C
.getCanonicalParamType(T
);
430 /// Contains required data for proper outlined function codegen.
431 struct FunctionOptions
{
432 /// Captured statement for which the function is generated.
433 const CapturedStmt
*S
= nullptr;
434 /// true if cast to/from UIntPtr is required for variables captured by
436 const bool UIntPtrCastRequired
= true;
437 /// true if only casted arguments must be registered as local args or VLA
439 const bool RegisterCastedArgsOnly
= false;
440 /// Name of the generated function.
441 const StringRef FunctionName
;
442 /// Location of the non-debug version of the outlined function.
444 explicit FunctionOptions(const CapturedStmt
*S
, bool UIntPtrCastRequired
,
445 bool RegisterCastedArgsOnly
, StringRef FunctionName
,
447 : S(S
), UIntPtrCastRequired(UIntPtrCastRequired
),
448 RegisterCastedArgsOnly(UIntPtrCastRequired
&& RegisterCastedArgsOnly
),
449 FunctionName(FunctionName
), Loc(Loc
) {}
453 static llvm::Function
*emitOutlinedFunctionPrologue(
454 CodeGenFunction
&CGF
, FunctionArgList
&Args
,
455 llvm::MapVector
<const Decl
*, std::pair
<const VarDecl
*, Address
>>
457 llvm::DenseMap
<const Decl
*, std::pair
<const Expr
*, llvm::Value
*>>
459 llvm::Value
*&CXXThisValue
, const FunctionOptions
&FO
) {
460 const CapturedDecl
*CD
= FO
.S
->getCapturedDecl();
461 const RecordDecl
*RD
= FO
.S
->getCapturedRecordDecl();
462 assert(CD
->hasBody() && "missing CapturedDecl body");
464 CXXThisValue
= nullptr;
465 // Build the argument list.
466 CodeGenModule
&CGM
= CGF
.CGM
;
467 ASTContext
&Ctx
= CGM
.getContext();
468 FunctionArgList TargetArgs
;
469 Args
.append(CD
->param_begin(),
470 std::next(CD
->param_begin(), CD
->getContextParamPosition()));
473 std::next(CD
->param_begin(), CD
->getContextParamPosition()));
474 auto I
= FO
.S
->captures().begin();
475 FunctionDecl
*DebugFunctionDecl
= nullptr;
476 if (!FO
.UIntPtrCastRequired
) {
477 FunctionProtoType::ExtProtoInfo EPI
;
478 QualType FunctionTy
= Ctx
.getFunctionType(Ctx
.VoidTy
, std::nullopt
, EPI
);
479 DebugFunctionDecl
= FunctionDecl::Create(
480 Ctx
, Ctx
.getTranslationUnitDecl(), FO
.S
->getBeginLoc(),
481 SourceLocation(), DeclarationName(), FunctionTy
,
482 Ctx
.getTrivialTypeSourceInfo(FunctionTy
), SC_Static
,
483 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
484 /*hasWrittenPrototype=*/false);
486 for (const FieldDecl
*FD
: RD
->fields()) {
487 QualType ArgType
= FD
->getType();
488 IdentifierInfo
*II
= nullptr;
489 VarDecl
*CapVar
= nullptr;
491 // If this is a capture by copy and the type is not a pointer, the outlined
492 // function argument type should be uintptr and the value properly casted to
493 // uintptr. This is necessary given that the runtime library is only able to
494 // deal with pointers. We can pass in the same way the VLA type sizes to the
495 // outlined function.
496 if (FO
.UIntPtrCastRequired
&&
497 ((I
->capturesVariableByCopy() && !ArgType
->isAnyPointerType()) ||
498 I
->capturesVariableArrayType()))
499 ArgType
= Ctx
.getUIntPtrType();
501 if (I
->capturesVariable() || I
->capturesVariableByCopy()) {
502 CapVar
= I
->getCapturedVar();
503 II
= CapVar
->getIdentifier();
504 } else if (I
->capturesThis()) {
505 II
= &Ctx
.Idents
.get("this");
507 assert(I
->capturesVariableArrayType());
508 II
= &Ctx
.Idents
.get("vla");
510 if (ArgType
->isVariablyModifiedType())
511 ArgType
= getCanonicalParamType(Ctx
, ArgType
);
513 if (CapVar
&& (CapVar
->getTLSKind() != clang::VarDecl::TLS_None
)) {
514 Arg
= ImplicitParamDecl::Create(Ctx
, /*DC=*/nullptr, FD
->getLocation(),
516 ImplicitParamKind::ThreadPrivateVar
);
517 } else if (DebugFunctionDecl
&& (CapVar
|| I
->capturesThis())) {
518 Arg
= ParmVarDecl::Create(
519 Ctx
, DebugFunctionDecl
,
520 CapVar
? CapVar
->getBeginLoc() : FD
->getBeginLoc(),
521 CapVar
? CapVar
->getLocation() : FD
->getLocation(), II
, ArgType
,
522 /*TInfo=*/nullptr, SC_None
, /*DefArg=*/nullptr);
524 Arg
= ImplicitParamDecl::Create(Ctx
, /*DC=*/nullptr, FD
->getLocation(),
525 II
, ArgType
, ImplicitParamKind::Other
);
527 Args
.emplace_back(Arg
);
528 // Do not cast arguments if we emit function with non-original types.
529 TargetArgs
.emplace_back(
530 FO
.UIntPtrCastRequired
532 : CGM
.getOpenMPRuntime().translateParameter(FD
, Arg
));
535 Args
.append(std::next(CD
->param_begin(), CD
->getContextParamPosition() + 1),
538 std::next(CD
->param_begin(), CD
->getContextParamPosition() + 1),
541 // Create the function declaration.
542 const CGFunctionInfo
&FuncInfo
=
543 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(Ctx
.VoidTy
, TargetArgs
);
544 llvm::FunctionType
*FuncLLVMTy
= CGM
.getTypes().GetFunctionType(FuncInfo
);
547 llvm::Function::Create(FuncLLVMTy
, llvm::GlobalValue::InternalLinkage
,
548 FO
.FunctionName
, &CGM
.getModule());
549 CGM
.SetInternalFunctionAttributes(CD
, F
, FuncInfo
);
551 F
->setDoesNotThrow();
552 F
->setDoesNotRecurse();
554 // Always inline the outlined function if optimizations are enabled.
555 if (CGM
.getCodeGenOpts().OptimizationLevel
!= 0) {
556 F
->removeFnAttr(llvm::Attribute::NoInline
);
557 F
->addFnAttr(llvm::Attribute::AlwaysInline
);
560 // Generate the function.
561 CGF
.StartFunction(CD
, Ctx
.VoidTy
, F
, FuncInfo
, TargetArgs
,
562 FO
.UIntPtrCastRequired
? FO
.Loc
: FO
.S
->getBeginLoc(),
563 FO
.UIntPtrCastRequired
? FO
.Loc
564 : CD
->getBody()->getBeginLoc());
565 unsigned Cnt
= CD
->getContextParamPosition();
566 I
= FO
.S
->captures().begin();
567 for (const FieldDecl
*FD
: RD
->fields()) {
568 // Do not map arguments if we emit function with non-original types.
569 Address
LocalAddr(Address::invalid());
570 if (!FO
.UIntPtrCastRequired
&& Args
[Cnt
] != TargetArgs
[Cnt
]) {
571 LocalAddr
= CGM
.getOpenMPRuntime().getParameterAddress(CGF
, Args
[Cnt
],
574 LocalAddr
= CGF
.GetAddrOfLocalVar(Args
[Cnt
]);
576 // If we are capturing a pointer by copy we don't need to do anything, just
577 // use the value that we get from the arguments.
578 if (I
->capturesVariableByCopy() && FD
->getType()->isAnyPointerType()) {
579 const VarDecl
*CurVD
= I
->getCapturedVar();
580 if (!FO
.RegisterCastedArgsOnly
)
581 LocalAddrs
.insert({Args
[Cnt
], {CurVD
, LocalAddr
}});
587 LValue ArgLVal
= CGF
.MakeAddrLValue(LocalAddr
, Args
[Cnt
]->getType(),
588 AlignmentSource::Decl
);
589 if (FD
->hasCapturedVLAType()) {
590 if (FO
.UIntPtrCastRequired
) {
591 ArgLVal
= CGF
.MakeAddrLValue(
592 castValueFromUintptr(CGF
, I
->getLocation(), FD
->getType(),
593 Args
[Cnt
]->getName(), ArgLVal
),
594 FD
->getType(), AlignmentSource::Decl
);
596 llvm::Value
*ExprArg
= CGF
.EmitLoadOfScalar(ArgLVal
, I
->getLocation());
597 const VariableArrayType
*VAT
= FD
->getCapturedVLAType();
598 VLASizes
.try_emplace(Args
[Cnt
], VAT
->getSizeExpr(), ExprArg
);
599 } else if (I
->capturesVariable()) {
600 const VarDecl
*Var
= I
->getCapturedVar();
601 QualType VarTy
= Var
->getType();
602 Address ArgAddr
= ArgLVal
.getAddress();
603 if (ArgLVal
.getType()->isLValueReferenceType()) {
604 ArgAddr
= CGF
.EmitLoadOfReference(ArgLVal
);
605 } else if (!VarTy
->isVariablyModifiedType() || !VarTy
->isPointerType()) {
606 assert(ArgLVal
.getType()->isPointerType());
607 ArgAddr
= CGF
.EmitLoadOfPointer(
608 ArgAddr
, ArgLVal
.getType()->castAs
<PointerType
>());
610 if (!FO
.RegisterCastedArgsOnly
) {
612 {Args
[Cnt
], {Var
, ArgAddr
.withAlignment(Ctx
.getDeclAlign(Var
))}});
614 } else if (I
->capturesVariableByCopy()) {
615 assert(!FD
->getType()->isAnyPointerType() &&
616 "Not expecting a captured pointer.");
617 const VarDecl
*Var
= I
->getCapturedVar();
618 LocalAddrs
.insert({Args
[Cnt
],
619 {Var
, FO
.UIntPtrCastRequired
620 ? castValueFromUintptr(
621 CGF
, I
->getLocation(), FD
->getType(),
622 Args
[Cnt
]->getName(), ArgLVal
)
623 : ArgLVal
.getAddress()}});
625 // If 'this' is captured, load it into CXXThisValue.
626 assert(I
->capturesThis());
627 CXXThisValue
= CGF
.EmitLoadOfScalar(ArgLVal
, I
->getLocation());
628 LocalAddrs
.insert({Args
[Cnt
], {nullptr, ArgLVal
.getAddress()}});
638 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt
&S
,
639 SourceLocation Loc
) {
642 "CapturedStmtInfo should be set when generating the captured function");
643 const CapturedDecl
*CD
= S
.getCapturedDecl();
644 // Build the argument list.
645 bool NeedWrapperFunction
=
646 getDebugInfo() && CGM
.getCodeGenOpts().hasReducedDebugInfo();
647 FunctionArgList Args
;
648 llvm::MapVector
<const Decl
*, std::pair
<const VarDecl
*, Address
>> LocalAddrs
;
649 llvm::DenseMap
<const Decl
*, std::pair
<const Expr
*, llvm::Value
*>> VLASizes
;
650 SmallString
<256> Buffer
;
651 llvm::raw_svector_ostream
Out(Buffer
);
652 Out
<< CapturedStmtInfo
->getHelperName();
653 if (NeedWrapperFunction
)
655 FunctionOptions
FO(&S
, !NeedWrapperFunction
, /*RegisterCastedArgsOnly=*/false,
657 llvm::Function
*F
= emitOutlinedFunctionPrologue(*this, Args
, LocalAddrs
,
658 VLASizes
, CXXThisValue
, FO
);
659 CodeGenFunction::OMPPrivateScope
LocalScope(*this);
660 for (const auto &LocalAddrPair
: LocalAddrs
) {
661 if (LocalAddrPair
.second
.first
) {
662 LocalScope
.addPrivate(LocalAddrPair
.second
.first
,
663 LocalAddrPair
.second
.second
);
666 (void)LocalScope
.Privatize();
667 for (const auto &VLASizePair
: VLASizes
)
668 VLASizeMap
[VLASizePair
.second
.first
] = VLASizePair
.second
.second
;
669 PGO
.assignRegionCounters(GlobalDecl(CD
), F
);
670 CapturedStmtInfo
->EmitBody(*this, CD
->getBody());
671 (void)LocalScope
.ForceCleanup();
672 FinishFunction(CD
->getBodyRBrace());
673 if (!NeedWrapperFunction
)
676 FunctionOptions
WrapperFO(&S
, /*UIntPtrCastRequired=*/true,
677 /*RegisterCastedArgsOnly=*/true,
678 CapturedStmtInfo
->getHelperName(), Loc
);
679 CodeGenFunction
WrapperCGF(CGM
, /*suppressNewContext=*/true);
680 WrapperCGF
.CapturedStmtInfo
= CapturedStmtInfo
;
684 llvm::Function
*WrapperF
=
685 emitOutlinedFunctionPrologue(WrapperCGF
, Args
, LocalAddrs
, VLASizes
,
686 WrapperCGF
.CXXThisValue
, WrapperFO
);
687 llvm::SmallVector
<llvm::Value
*, 4> CallArgs
;
688 auto *PI
= F
->arg_begin();
689 for (const auto *Arg
: Args
) {
690 llvm::Value
*CallArg
;
691 auto I
= LocalAddrs
.find(Arg
);
692 if (I
!= LocalAddrs
.end()) {
693 LValue LV
= WrapperCGF
.MakeAddrLValue(
695 I
->second
.first
? I
->second
.first
->getType() : Arg
->getType(),
696 AlignmentSource::Decl
);
697 if (LV
.getType()->isAnyComplexType())
698 LV
.setAddress(LV
.getAddress().withElementType(PI
->getType()));
699 CallArg
= WrapperCGF
.EmitLoadOfScalar(LV
, S
.getBeginLoc());
701 auto EI
= VLASizes
.find(Arg
);
702 if (EI
!= VLASizes
.end()) {
703 CallArg
= EI
->second
.second
;
706 WrapperCGF
.MakeAddrLValue(WrapperCGF
.GetAddrOfLocalVar(Arg
),
707 Arg
->getType(), AlignmentSource::Decl
);
708 CallArg
= WrapperCGF
.EmitLoadOfScalar(LV
, S
.getBeginLoc());
711 CallArgs
.emplace_back(WrapperCGF
.EmitFromMemory(CallArg
, Arg
->getType()));
714 CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF
, Loc
, F
, CallArgs
);
715 WrapperCGF
.FinishFunction();
719 //===----------------------------------------------------------------------===//
720 // OpenMP Directive Emission
721 //===----------------------------------------------------------------------===//
722 void CodeGenFunction::EmitOMPAggregateAssign(
723 Address DestAddr
, Address SrcAddr
, QualType OriginalType
,
724 const llvm::function_ref
<void(Address
, Address
)> CopyGen
) {
725 // Perform element-by-element initialization.
728 // Drill down to the base element type on both arrays.
729 const ArrayType
*ArrayTy
= OriginalType
->getAsArrayTypeUnsafe();
730 llvm::Value
*NumElements
= emitArrayLength(ArrayTy
, ElementTy
, DestAddr
);
731 SrcAddr
= SrcAddr
.withElementType(DestAddr
.getElementType());
733 llvm::Value
*SrcBegin
= SrcAddr
.emitRawPointer(*this);
734 llvm::Value
*DestBegin
= DestAddr
.emitRawPointer(*this);
735 // Cast from pointer to array type to pointer to single element.
736 llvm::Value
*DestEnd
= Builder
.CreateInBoundsGEP(DestAddr
.getElementType(),
737 DestBegin
, NumElements
);
739 // The basic structure here is a while-do loop.
740 llvm::BasicBlock
*BodyBB
= createBasicBlock("omp.arraycpy.body");
741 llvm::BasicBlock
*DoneBB
= createBasicBlock("omp.arraycpy.done");
742 llvm::Value
*IsEmpty
=
743 Builder
.CreateICmpEQ(DestBegin
, DestEnd
, "omp.arraycpy.isempty");
744 Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
746 // Enter the loop body, making that address the current address.
747 llvm::BasicBlock
*EntryBB
= Builder
.GetInsertBlock();
750 CharUnits ElementSize
= getContext().getTypeSizeInChars(ElementTy
);
752 llvm::PHINode
*SrcElementPHI
=
753 Builder
.CreatePHI(SrcBegin
->getType(), 2, "omp.arraycpy.srcElementPast");
754 SrcElementPHI
->addIncoming(SrcBegin
, EntryBB
);
755 Address SrcElementCurrent
=
756 Address(SrcElementPHI
, SrcAddr
.getElementType(),
757 SrcAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
759 llvm::PHINode
*DestElementPHI
= Builder
.CreatePHI(
760 DestBegin
->getType(), 2, "omp.arraycpy.destElementPast");
761 DestElementPHI
->addIncoming(DestBegin
, EntryBB
);
762 Address DestElementCurrent
=
763 Address(DestElementPHI
, DestAddr
.getElementType(),
764 DestAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
767 CopyGen(DestElementCurrent
, SrcElementCurrent
);
769 // Shift the address forward by one element.
770 llvm::Value
*DestElementNext
=
771 Builder
.CreateConstGEP1_32(DestAddr
.getElementType(), DestElementPHI
,
772 /*Idx0=*/1, "omp.arraycpy.dest.element");
773 llvm::Value
*SrcElementNext
=
774 Builder
.CreateConstGEP1_32(SrcAddr
.getElementType(), SrcElementPHI
,
775 /*Idx0=*/1, "omp.arraycpy.src.element");
776 // Check whether we've reached the end.
778 Builder
.CreateICmpEQ(DestElementNext
, DestEnd
, "omp.arraycpy.done");
779 Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
780 DestElementPHI
->addIncoming(DestElementNext
, Builder
.GetInsertBlock());
781 SrcElementPHI
->addIncoming(SrcElementNext
, Builder
.GetInsertBlock());
784 EmitBlock(DoneBB
, /*IsFinished=*/true);
787 void CodeGenFunction::EmitOMPCopy(QualType OriginalType
, Address DestAddr
,
788 Address SrcAddr
, const VarDecl
*DestVD
,
789 const VarDecl
*SrcVD
, const Expr
*Copy
) {
790 if (OriginalType
->isArrayType()) {
791 const auto *BO
= dyn_cast
<BinaryOperator
>(Copy
);
792 if (BO
&& BO
->getOpcode() == BO_Assign
) {
793 // Perform simple memcpy for simple copying.
794 LValue Dest
= MakeAddrLValue(DestAddr
, OriginalType
);
795 LValue Src
= MakeAddrLValue(SrcAddr
, OriginalType
);
796 EmitAggregateAssign(Dest
, Src
, OriginalType
);
798 // For arrays with complex element types perform element by element
800 EmitOMPAggregateAssign(
801 DestAddr
, SrcAddr
, OriginalType
,
802 [this, Copy
, SrcVD
, DestVD
](Address DestElement
, Address SrcElement
) {
803 // Working with the single array element, so have to remap
804 // destination and source variables to corresponding array
806 CodeGenFunction::OMPPrivateScope
Remap(*this);
807 Remap
.addPrivate(DestVD
, DestElement
);
808 Remap
.addPrivate(SrcVD
, SrcElement
);
809 (void)Remap
.Privatize();
810 EmitIgnoredExpr(Copy
);
814 // Remap pseudo source variable to private copy.
815 CodeGenFunction::OMPPrivateScope
Remap(*this);
816 Remap
.addPrivate(SrcVD
, SrcAddr
);
817 Remap
.addPrivate(DestVD
, DestAddr
);
818 (void)Remap
.Privatize();
819 // Emit copying of the whole variable.
820 EmitIgnoredExpr(Copy
);
824 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective
&D
,
825 OMPPrivateScope
&PrivateScope
) {
826 if (!HaveInsertPoint())
828 bool DeviceConstTarget
=
829 getLangOpts().OpenMPIsTargetDevice
&&
830 isOpenMPTargetExecutionDirective(D
.getDirectiveKind());
831 bool FirstprivateIsLastprivate
= false;
832 llvm::DenseMap
<const VarDecl
*, OpenMPLastprivateModifier
> Lastprivates
;
833 for (const auto *C
: D
.getClausesOfKind
<OMPLastprivateClause
>()) {
834 for (const auto *D
: C
->varlists())
835 Lastprivates
.try_emplace(
836 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
)->getDecl())->getCanonicalDecl(),
839 llvm::DenseSet
<const VarDecl
*> EmittedAsFirstprivate
;
840 llvm::SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
841 getOpenMPCaptureRegions(CaptureRegions
, D
.getDirectiveKind());
842 // Force emission of the firstprivate copy if the directive does not emit
843 // outlined function, like omp for, omp simd, omp distribute etc.
844 bool MustEmitFirstprivateCopy
=
845 CaptureRegions
.size() == 1 && CaptureRegions
.back() == OMPD_unknown
;
846 for (const auto *C
: D
.getClausesOfKind
<OMPFirstprivateClause
>()) {
847 const auto *IRef
= C
->varlist_begin();
848 const auto *InitsRef
= C
->inits().begin();
849 for (const Expr
*IInit
: C
->private_copies()) {
850 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
851 bool ThisFirstprivateIsLastprivate
=
852 Lastprivates
.count(OrigVD
->getCanonicalDecl()) > 0;
853 const FieldDecl
*FD
= CapturedStmtInfo
->lookup(OrigVD
);
854 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IInit
)->getDecl());
855 if (!MustEmitFirstprivateCopy
&& !ThisFirstprivateIsLastprivate
&& FD
&&
856 !FD
->getType()->isReferenceType() &&
857 (!VD
|| !VD
->hasAttr
<OMPAllocateDeclAttr
>())) {
858 EmittedAsFirstprivate
.insert(OrigVD
->getCanonicalDecl());
863 // Do not emit copy for firstprivate constant variables in target regions,
864 // captured by reference.
865 if (DeviceConstTarget
&& OrigVD
->getType().isConstant(getContext()) &&
866 FD
&& FD
->getType()->isReferenceType() &&
867 (!VD
|| !VD
->hasAttr
<OMPAllocateDeclAttr
>())) {
868 EmittedAsFirstprivate
.insert(OrigVD
->getCanonicalDecl());
873 FirstprivateIsLastprivate
=
874 FirstprivateIsLastprivate
|| ThisFirstprivateIsLastprivate
;
875 if (EmittedAsFirstprivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
877 cast
<VarDecl
>(cast
<DeclRefExpr
>(*InitsRef
)->getDecl());
879 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
880 /*RefersToEnclosingVariableOrCapture=*/FD
!= nullptr,
881 (*IRef
)->getType(), VK_LValue
, (*IRef
)->getExprLoc());
884 // Check if the firstprivate variable is just a constant value.
885 ConstantEmission CE
= tryEmitAsConstant(&DRE
);
886 if (CE
&& !CE
.isReference()) {
887 // Constant value, no need to create a copy.
892 if (CE
&& CE
.isReference()) {
893 OriginalLVal
= CE
.getReferenceLValue(*this, &DRE
);
895 assert(!CE
&& "Expected non-constant firstprivate.");
896 OriginalLVal
= EmitLValue(&DRE
);
899 OriginalLVal
= EmitLValue(&DRE
);
901 QualType Type
= VD
->getType();
902 if (Type
->isArrayType()) {
903 // Emit VarDecl with copy init for arrays.
904 // Get the address of the original variable captured in current
906 AutoVarEmission Emission
= EmitAutoVarAlloca(*VD
);
907 const Expr
*Init
= VD
->getInit();
908 if (!isa
<CXXConstructExpr
>(Init
) || isTrivialInitializer(Init
)) {
909 // Perform simple memcpy.
910 LValue Dest
= MakeAddrLValue(Emission
.getAllocatedAddress(), Type
);
911 EmitAggregateAssign(Dest
, OriginalLVal
, Type
);
913 EmitOMPAggregateAssign(
914 Emission
.getAllocatedAddress(), OriginalLVal
.getAddress(), Type
,
915 [this, VDInit
, Init
](Address DestElement
, Address SrcElement
) {
916 // Clean up any temporaries needed by the
918 RunCleanupsScope
InitScope(*this);
919 // Emit initialization for single element.
920 setAddrOfLocalVar(VDInit
, SrcElement
);
921 EmitAnyExprToMem(Init
, DestElement
,
922 Init
->getType().getQualifiers(),
923 /*IsInitializer*/ false);
924 LocalDeclMap
.erase(VDInit
);
927 EmitAutoVarCleanups(Emission
);
929 PrivateScope
.addPrivate(OrigVD
, Emission
.getAllocatedAddress());
931 Address OriginalAddr
= OriginalLVal
.getAddress();
932 // Emit private VarDecl with copy init.
933 // Remap temp VDInit variable to the address of the original
934 // variable (for proper handling of captured global variables).
935 setAddrOfLocalVar(VDInit
, OriginalAddr
);
937 LocalDeclMap
.erase(VDInit
);
938 Address VDAddr
= GetAddrOfLocalVar(VD
);
939 if (ThisFirstprivateIsLastprivate
&&
940 Lastprivates
[OrigVD
->getCanonicalDecl()] ==
941 OMPC_LASTPRIVATE_conditional
) {
942 // Create/init special variable for lastprivate conditionals.
944 EmitLoadOfScalar(MakeAddrLValue(VDAddr
, (*IRef
)->getType(),
945 AlignmentSource::Decl
),
946 (*IRef
)->getExprLoc());
947 VDAddr
= CGM
.getOpenMPRuntime().emitLastprivateConditionalInit(
949 EmitStoreOfScalar(V
, MakeAddrLValue(VDAddr
, (*IRef
)->getType(),
950 AlignmentSource::Decl
));
951 LocalDeclMap
.erase(VD
);
952 setAddrOfLocalVar(VD
, VDAddr
);
954 IsRegistered
= PrivateScope
.addPrivate(OrigVD
, VDAddr
);
956 assert(IsRegistered
&&
957 "firstprivate var already registered as private");
958 // Silence the warning about unused variable.
965 return FirstprivateIsLastprivate
&& !EmittedAsFirstprivate
.empty();
968 void CodeGenFunction::EmitOMPPrivateClause(
969 const OMPExecutableDirective
&D
,
970 CodeGenFunction::OMPPrivateScope
&PrivateScope
) {
971 if (!HaveInsertPoint())
973 llvm::DenseSet
<const VarDecl
*> EmittedAsPrivate
;
974 for (const auto *C
: D
.getClausesOfKind
<OMPPrivateClause
>()) {
975 auto IRef
= C
->varlist_begin();
976 for (const Expr
*IInit
: C
->private_copies()) {
977 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
978 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
979 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IInit
)->getDecl());
981 // Emit private VarDecl with copy init.
983 PrivateScope
.addPrivate(OrigVD
, GetAddrOfLocalVar(VD
));
984 assert(IsRegistered
&& "private var already registered as private");
985 // Silence the warning about unused variable.
993 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective
&D
) {
994 if (!HaveInsertPoint())
996 // threadprivate_var1 = master_threadprivate_var1;
997 // operator=(threadprivate_var2, master_threadprivate_var2);
999 // __kmpc_barrier(&loc, global_tid);
1000 llvm::DenseSet
<const VarDecl
*> CopiedVars
;
1001 llvm::BasicBlock
*CopyBegin
= nullptr, *CopyEnd
= nullptr;
1002 for (const auto *C
: D
.getClausesOfKind
<OMPCopyinClause
>()) {
1003 auto IRef
= C
->varlist_begin();
1004 auto ISrcRef
= C
->source_exprs().begin();
1005 auto IDestRef
= C
->destination_exprs().begin();
1006 for (const Expr
*AssignOp
: C
->assignment_ops()) {
1007 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
1008 QualType Type
= VD
->getType();
1009 if (CopiedVars
.insert(VD
->getCanonicalDecl()).second
) {
1010 // Get the address of the master variable. If we are emitting code with
1011 // TLS support, the address is passed from the master as field in the
1012 // captured declaration.
1013 Address MasterAddr
= Address::invalid();
1014 if (getLangOpts().OpenMPUseTLS
&&
1015 getContext().getTargetInfo().isTLSSupported()) {
1016 assert(CapturedStmtInfo
->lookup(VD
) &&
1017 "Copyin threadprivates should have been captured!");
1018 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(VD
), true,
1019 (*IRef
)->getType(), VK_LValue
, (*IRef
)->getExprLoc());
1020 MasterAddr
= EmitLValue(&DRE
).getAddress();
1021 LocalDeclMap
.erase(VD
);
1024 Address(VD
->isStaticLocal() ? CGM
.getStaticLocalDeclAddress(VD
)
1025 : CGM
.GetAddrOfGlobal(VD
),
1026 CGM
.getTypes().ConvertTypeForMem(VD
->getType()),
1027 getContext().getDeclAlign(VD
));
1029 // Get the address of the threadprivate variable.
1030 Address PrivateAddr
= EmitLValue(*IRef
).getAddress();
1031 if (CopiedVars
.size() == 1) {
1032 // At first check if current thread is a master thread. If it is, no
1033 // need to copy data.
1034 CopyBegin
= createBasicBlock("copyin.not.master");
1035 CopyEnd
= createBasicBlock("copyin.not.master.end");
1036 // TODO: Avoid ptrtoint conversion.
1037 auto *MasterAddrInt
= Builder
.CreatePtrToInt(
1038 MasterAddr
.emitRawPointer(*this), CGM
.IntPtrTy
);
1039 auto *PrivateAddrInt
= Builder
.CreatePtrToInt(
1040 PrivateAddr
.emitRawPointer(*this), CGM
.IntPtrTy
);
1041 Builder
.CreateCondBr(
1042 Builder
.CreateICmpNE(MasterAddrInt
, PrivateAddrInt
), CopyBegin
,
1044 EmitBlock(CopyBegin
);
1047 cast
<VarDecl
>(cast
<DeclRefExpr
>(*ISrcRef
)->getDecl());
1048 const auto *DestVD
=
1049 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IDestRef
)->getDecl());
1050 EmitOMPCopy(Type
, PrivateAddr
, MasterAddr
, DestVD
, SrcVD
, AssignOp
);
1058 // Exit out of copying procedure for non-master thread.
1059 EmitBlock(CopyEnd
, /*IsFinished=*/true);
1065 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1066 const OMPExecutableDirective
&D
, OMPPrivateScope
&PrivateScope
) {
1067 if (!HaveInsertPoint())
1069 bool HasAtLeastOneLastprivate
= false;
1070 llvm::DenseSet
<const VarDecl
*> SIMDLCVs
;
1071 if (isOpenMPSimdDirective(D
.getDirectiveKind())) {
1072 const auto *LoopDirective
= cast
<OMPLoopDirective
>(&D
);
1073 for (const Expr
*C
: LoopDirective
->counters()) {
1075 cast
<VarDecl
>(cast
<DeclRefExpr
>(C
)->getDecl())->getCanonicalDecl());
1078 llvm::DenseSet
<const VarDecl
*> AlreadyEmittedVars
;
1079 for (const auto *C
: D
.getClausesOfKind
<OMPLastprivateClause
>()) {
1080 HasAtLeastOneLastprivate
= true;
1081 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind()) &&
1082 !getLangOpts().OpenMPSimd
)
1084 const auto *IRef
= C
->varlist_begin();
1085 const auto *IDestRef
= C
->destination_exprs().begin();
1086 for (const Expr
*IInit
: C
->private_copies()) {
1087 // Keep the address of the original variable for future update at the end
1089 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
1090 // Taskloops do not require additional initialization, it is done in
1091 // runtime support library.
1092 if (AlreadyEmittedVars
.insert(OrigVD
->getCanonicalDecl()).second
) {
1093 const auto *DestVD
=
1094 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IDestRef
)->getDecl());
1095 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
1096 /*RefersToEnclosingVariableOrCapture=*/
1097 CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
1098 (*IRef
)->getType(), VK_LValue
, (*IRef
)->getExprLoc());
1099 PrivateScope
.addPrivate(DestVD
, EmitLValue(&DRE
).getAddress());
1100 // Check if the variable is also a firstprivate: in this case IInit is
1101 // not generated. Initialization of this variable will happen in codegen
1102 // for 'firstprivate' clause.
1103 if (IInit
&& !SIMDLCVs
.count(OrigVD
->getCanonicalDecl())) {
1104 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IInit
)->getDecl());
1105 Address VDAddr
= Address::invalid();
1106 if (C
->getKind() == OMPC_LASTPRIVATE_conditional
) {
1107 VDAddr
= CGM
.getOpenMPRuntime().emitLastprivateConditionalInit(
1109 setAddrOfLocalVar(VD
, VDAddr
);
1111 // Emit private VarDecl with copy init.
1113 VDAddr
= GetAddrOfLocalVar(VD
);
1115 bool IsRegistered
= PrivateScope
.addPrivate(OrigVD
, VDAddr
);
1116 assert(IsRegistered
&&
1117 "lastprivate var already registered as private");
1125 return HasAtLeastOneLastprivate
;
1128 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1129 const OMPExecutableDirective
&D
, bool NoFinals
,
1130 llvm::Value
*IsLastIterCond
) {
1131 if (!HaveInsertPoint())
1133 // Emit following code:
1134 // if (<IsLastIterCond>) {
1135 // orig_var1 = private_orig_var1;
1137 // orig_varn = private_orig_varn;
1139 llvm::BasicBlock
*ThenBB
= nullptr;
1140 llvm::BasicBlock
*DoneBB
= nullptr;
1141 if (IsLastIterCond
) {
1142 // Emit implicit barrier if at least one lastprivate conditional is found
1143 // and this is not a simd mode.
1144 if (!getLangOpts().OpenMPSimd
&&
1145 llvm::any_of(D
.getClausesOfKind
<OMPLastprivateClause
>(),
1146 [](const OMPLastprivateClause
*C
) {
1147 return C
->getKind() == OMPC_LASTPRIVATE_conditional
;
1149 CGM
.getOpenMPRuntime().emitBarrierCall(*this, D
.getBeginLoc(),
1151 /*EmitChecks=*/false,
1152 /*ForceSimpleCall=*/true);
1154 ThenBB
= createBasicBlock(".omp.lastprivate.then");
1155 DoneBB
= createBasicBlock(".omp.lastprivate.done");
1156 Builder
.CreateCondBr(IsLastIterCond
, ThenBB
, DoneBB
);
1159 llvm::DenseSet
<const VarDecl
*> AlreadyEmittedVars
;
1160 llvm::DenseMap
<const VarDecl
*, const Expr
*> LoopCountersAndUpdates
;
1161 if (const auto *LoopDirective
= dyn_cast
<OMPLoopDirective
>(&D
)) {
1162 auto IC
= LoopDirective
->counters().begin();
1163 for (const Expr
*F
: LoopDirective
->finals()) {
1165 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IC
)->getDecl())->getCanonicalDecl();
1167 AlreadyEmittedVars
.insert(D
);
1169 LoopCountersAndUpdates
[D
] = F
;
1173 for (const auto *C
: D
.getClausesOfKind
<OMPLastprivateClause
>()) {
1174 auto IRef
= C
->varlist_begin();
1175 auto ISrcRef
= C
->source_exprs().begin();
1176 auto IDestRef
= C
->destination_exprs().begin();
1177 for (const Expr
*AssignOp
: C
->assignment_ops()) {
1178 const auto *PrivateVD
=
1179 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
1180 QualType Type
= PrivateVD
->getType();
1181 const auto *CanonicalVD
= PrivateVD
->getCanonicalDecl();
1182 if (AlreadyEmittedVars
.insert(CanonicalVD
).second
) {
1183 // If lastprivate variable is a loop control variable for loop-based
1184 // directive, update its value before copyin back to original
1186 if (const Expr
*FinalExpr
= LoopCountersAndUpdates
.lookup(CanonicalVD
))
1187 EmitIgnoredExpr(FinalExpr
);
1189 cast
<VarDecl
>(cast
<DeclRefExpr
>(*ISrcRef
)->getDecl());
1190 const auto *DestVD
=
1191 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IDestRef
)->getDecl());
1192 // Get the address of the private variable.
1193 Address PrivateAddr
= GetAddrOfLocalVar(PrivateVD
);
1194 if (const auto *RefTy
= PrivateVD
->getType()->getAs
<ReferenceType
>())
1195 PrivateAddr
= Address(
1196 Builder
.CreateLoad(PrivateAddr
),
1197 CGM
.getTypes().ConvertTypeForMem(RefTy
->getPointeeType()),
1198 CGM
.getNaturalTypeAlignment(RefTy
->getPointeeType()));
1199 // Store the last value to the private copy in the last iteration.
1200 if (C
->getKind() == OMPC_LASTPRIVATE_conditional
)
1201 CGM
.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1202 *this, MakeAddrLValue(PrivateAddr
, (*IRef
)->getType()), PrivateVD
,
1203 (*IRef
)->getExprLoc());
1204 // Get the address of the original variable.
1205 Address OriginalAddr
= GetAddrOfLocalVar(DestVD
);
1206 EmitOMPCopy(Type
, OriginalAddr
, PrivateAddr
, DestVD
, SrcVD
, AssignOp
);
1212 if (const Expr
*PostUpdate
= C
->getPostUpdateExpr())
1213 EmitIgnoredExpr(PostUpdate
);
1216 EmitBlock(DoneBB
, /*IsFinished=*/true);
1219 void CodeGenFunction::EmitOMPReductionClauseInit(
1220 const OMPExecutableDirective
&D
,
1221 CodeGenFunction::OMPPrivateScope
&PrivateScope
, bool ForInscan
) {
1222 if (!HaveInsertPoint())
1224 SmallVector
<const Expr
*, 4> Shareds
;
1225 SmallVector
<const Expr
*, 4> Privates
;
1226 SmallVector
<const Expr
*, 4> ReductionOps
;
1227 SmallVector
<const Expr
*, 4> LHSs
;
1228 SmallVector
<const Expr
*, 4> RHSs
;
1230 SmallVector
<const Expr
*, 4> TaskLHSs
;
1231 SmallVector
<const Expr
*, 4> TaskRHSs
;
1232 for (const auto *C
: D
.getClausesOfKind
<OMPReductionClause
>()) {
1233 if (ForInscan
!= (C
->getModifier() == OMPC_REDUCTION_inscan
))
1235 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
1236 Privates
.append(C
->privates().begin(), C
->privates().end());
1237 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
1238 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
1239 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
1240 if (C
->getModifier() == OMPC_REDUCTION_task
) {
1241 Data
.ReductionVars
.append(C
->privates().begin(), C
->privates().end());
1242 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
1243 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
1244 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
1245 C
->reduction_ops().end());
1246 TaskLHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
1247 TaskRHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
1250 ReductionCodeGen
RedCG(Shareds
, Shareds
, Privates
, ReductionOps
);
1252 auto *ILHS
= LHSs
.begin();
1253 auto *IRHS
= RHSs
.begin();
1254 auto *IPriv
= Privates
.begin();
1255 for (const Expr
*IRef
: Shareds
) {
1256 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IPriv
)->getDecl());
1257 // Emit private VarDecl with reduction init.
1258 RedCG
.emitSharedOrigLValue(*this, Count
);
1259 RedCG
.emitAggregateType(*this, Count
);
1260 AutoVarEmission Emission
= EmitAutoVarAlloca(*PrivateVD
);
1261 RedCG
.emitInitialization(*this, Count
, Emission
.getAllocatedAddress(),
1262 RedCG
.getSharedLValue(Count
).getAddress(),
1263 [&Emission
](CodeGenFunction
&CGF
) {
1264 CGF
.EmitAutoVarInit(Emission
);
1267 EmitAutoVarCleanups(Emission
);
1268 Address BaseAddr
= RedCG
.adjustPrivateAddress(
1269 *this, Count
, Emission
.getAllocatedAddress());
1271 PrivateScope
.addPrivate(RedCG
.getBaseDecl(Count
), BaseAddr
);
1272 assert(IsRegistered
&& "private var already registered as private");
1273 // Silence the warning about unused variable.
1276 const auto *LHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
1277 const auto *RHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
1278 QualType Type
= PrivateVD
->getType();
1279 bool isaOMPArraySectionExpr
= isa
<ArraySectionExpr
>(IRef
);
1280 if (isaOMPArraySectionExpr
&& Type
->isVariablyModifiedType()) {
1281 // Store the address of the original variable associated with the LHS
1282 // implicit variable.
1283 PrivateScope
.addPrivate(LHSVD
, RedCG
.getSharedLValue(Count
).getAddress());
1284 PrivateScope
.addPrivate(RHSVD
, GetAddrOfLocalVar(PrivateVD
));
1285 } else if ((isaOMPArraySectionExpr
&& Type
->isScalarType()) ||
1286 isa
<ArraySubscriptExpr
>(IRef
)) {
1287 // Store the address of the original variable associated with the LHS
1288 // implicit variable.
1289 PrivateScope
.addPrivate(LHSVD
, RedCG
.getSharedLValue(Count
).getAddress());
1290 PrivateScope
.addPrivate(RHSVD
,
1291 GetAddrOfLocalVar(PrivateVD
).withElementType(
1292 ConvertTypeForMem(RHSVD
->getType())));
1294 QualType Type
= PrivateVD
->getType();
1295 bool IsArray
= getContext().getAsArrayType(Type
) != nullptr;
1296 Address OriginalAddr
= RedCG
.getSharedLValue(Count
).getAddress();
1297 // Store the address of the original variable associated with the LHS
1298 // implicit variable.
1301 OriginalAddr
.withElementType(ConvertTypeForMem(LHSVD
->getType()));
1303 PrivateScope
.addPrivate(LHSVD
, OriginalAddr
);
1304 PrivateScope
.addPrivate(
1305 RHSVD
, IsArray
? GetAddrOfLocalVar(PrivateVD
).withElementType(
1306 ConvertTypeForMem(RHSVD
->getType()))
1307 : GetAddrOfLocalVar(PrivateVD
));
1314 if (!Data
.ReductionVars
.empty()) {
1315 Data
.IsReductionWithTaskMod
= true;
1316 Data
.IsWorksharingReduction
=
1317 isOpenMPWorksharingDirective(D
.getDirectiveKind());
1318 llvm::Value
*ReductionDesc
= CGM
.getOpenMPRuntime().emitTaskReductionInit(
1319 *this, D
.getBeginLoc(), TaskLHSs
, TaskRHSs
, Data
);
1320 const Expr
*TaskRedRef
= nullptr;
1321 switch (D
.getDirectiveKind()) {
1323 TaskRedRef
= cast
<OMPParallelDirective
>(D
).getTaskReductionRefExpr();
1326 TaskRedRef
= cast
<OMPForDirective
>(D
).getTaskReductionRefExpr();
1329 TaskRedRef
= cast
<OMPSectionsDirective
>(D
).getTaskReductionRefExpr();
1331 case OMPD_parallel_for
:
1332 TaskRedRef
= cast
<OMPParallelForDirective
>(D
).getTaskReductionRefExpr();
1334 case OMPD_parallel_master
:
1336 cast
<OMPParallelMasterDirective
>(D
).getTaskReductionRefExpr();
1338 case OMPD_parallel_sections
:
1340 cast
<OMPParallelSectionsDirective
>(D
).getTaskReductionRefExpr();
1342 case OMPD_target_parallel
:
1344 cast
<OMPTargetParallelDirective
>(D
).getTaskReductionRefExpr();
1346 case OMPD_target_parallel_for
:
1348 cast
<OMPTargetParallelForDirective
>(D
).getTaskReductionRefExpr();
1350 case OMPD_distribute_parallel_for
:
1352 cast
<OMPDistributeParallelForDirective
>(D
).getTaskReductionRefExpr();
1354 case OMPD_teams_distribute_parallel_for
:
1355 TaskRedRef
= cast
<OMPTeamsDistributeParallelForDirective
>(D
)
1356 .getTaskReductionRefExpr();
1358 case OMPD_target_teams_distribute_parallel_for
:
1359 TaskRedRef
= cast
<OMPTargetTeamsDistributeParallelForDirective
>(D
)
1360 .getTaskReductionRefExpr();
1368 case OMPD_parallel_for_simd
:
1370 case OMPD_taskyield
:
1374 case OMPD_taskgroup
:
1382 case OMPD_cancellation_point
:
1384 case OMPD_target_data
:
1385 case OMPD_target_enter_data
:
1386 case OMPD_target_exit_data
:
1388 case OMPD_taskloop_simd
:
1389 case OMPD_master_taskloop
:
1390 case OMPD_master_taskloop_simd
:
1391 case OMPD_parallel_master_taskloop
:
1392 case OMPD_parallel_master_taskloop_simd
:
1393 case OMPD_distribute
:
1394 case OMPD_target_update
:
1395 case OMPD_distribute_parallel_for_simd
:
1396 case OMPD_distribute_simd
:
1397 case OMPD_target_parallel_for_simd
:
1398 case OMPD_target_simd
:
1399 case OMPD_teams_distribute
:
1400 case OMPD_teams_distribute_simd
:
1401 case OMPD_teams_distribute_parallel_for_simd
:
1402 case OMPD_target_teams
:
1403 case OMPD_target_teams_distribute
:
1404 case OMPD_target_teams_distribute_parallel_for_simd
:
1405 case OMPD_target_teams_distribute_simd
:
1406 case OMPD_declare_target
:
1407 case OMPD_end_declare_target
:
1408 case OMPD_threadprivate
:
1410 case OMPD_declare_reduction
:
1411 case OMPD_declare_mapper
:
1412 case OMPD_declare_simd
:
1414 case OMPD_declare_variant
:
1415 case OMPD_begin_declare_variant
:
1416 case OMPD_end_declare_variant
:
1419 llvm_unreachable("Unexpected directive with task reductions.");
1422 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(TaskRedRef
)->getDecl());
1424 EmitStoreOfScalar(ReductionDesc
, GetAddrOfLocalVar(VD
),
1425 /*Volatile=*/false, TaskRedRef
->getType());
1429 void CodeGenFunction::EmitOMPReductionClauseFinal(
1430 const OMPExecutableDirective
&D
, const OpenMPDirectiveKind ReductionKind
) {
1431 if (!HaveInsertPoint())
1433 llvm::SmallVector
<const Expr
*, 8> Privates
;
1434 llvm::SmallVector
<const Expr
*, 8> LHSExprs
;
1435 llvm::SmallVector
<const Expr
*, 8> RHSExprs
;
1436 llvm::SmallVector
<const Expr
*, 8> ReductionOps
;
1437 bool HasAtLeastOneReduction
= false;
1438 bool IsReductionWithTaskMod
= false;
1439 for (const auto *C
: D
.getClausesOfKind
<OMPReductionClause
>()) {
1440 // Do not emit for inscan reductions.
1441 if (C
->getModifier() == OMPC_REDUCTION_inscan
)
1443 HasAtLeastOneReduction
= true;
1444 Privates
.append(C
->privates().begin(), C
->privates().end());
1445 LHSExprs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
1446 RHSExprs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
1447 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
1448 IsReductionWithTaskMod
=
1449 IsReductionWithTaskMod
|| C
->getModifier() == OMPC_REDUCTION_task
;
1451 if (HasAtLeastOneReduction
) {
1452 if (IsReductionWithTaskMod
) {
1453 CGM
.getOpenMPRuntime().emitTaskReductionFini(
1454 *this, D
.getBeginLoc(),
1455 isOpenMPWorksharingDirective(D
.getDirectiveKind()));
1457 bool TeamsLoopCanBeParallel
= false;
1458 if (auto *TTLD
= dyn_cast
<OMPTargetTeamsGenericLoopDirective
>(&D
))
1459 TeamsLoopCanBeParallel
= TTLD
->canBeParallelFor();
1460 bool WithNowait
= D
.getSingleClause
<OMPNowaitClause
>() ||
1461 isOpenMPParallelDirective(D
.getDirectiveKind()) ||
1462 TeamsLoopCanBeParallel
|| ReductionKind
== OMPD_simd
;
1463 bool SimpleReduction
= ReductionKind
== OMPD_simd
;
1464 // Emit nowait reduction if nowait clause is present or directive is a
1465 // parallel directive (it always has implicit barrier).
1466 CGM
.getOpenMPRuntime().emitReduction(
1467 *this, D
.getEndLoc(), Privates
, LHSExprs
, RHSExprs
, ReductionOps
,
1468 {WithNowait
, SimpleReduction
, ReductionKind
});
1472 static void emitPostUpdateForReductionClause(
1473 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
1474 const llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> CondGen
) {
1475 if (!CGF
.HaveInsertPoint())
1477 llvm::BasicBlock
*DoneBB
= nullptr;
1478 for (const auto *C
: D
.getClausesOfKind
<OMPReductionClause
>()) {
1479 if (const Expr
*PostUpdate
= C
->getPostUpdateExpr()) {
1481 if (llvm::Value
*Cond
= CondGen(CGF
)) {
1482 // If the first post-update expression is found, emit conditional
1483 // block if it was requested.
1484 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock(".omp.reduction.pu");
1485 DoneBB
= CGF
.createBasicBlock(".omp.reduction.pu.done");
1486 CGF
.Builder
.CreateCondBr(Cond
, ThenBB
, DoneBB
);
1487 CGF
.EmitBlock(ThenBB
);
1490 CGF
.EmitIgnoredExpr(PostUpdate
);
1494 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
1498 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1499 /// parallel function. This is necessary for combined constructs such as
1500 /// 'distribute parallel for'
1501 typedef llvm::function_ref
<void(CodeGenFunction
&,
1502 const OMPExecutableDirective
&,
1503 llvm::SmallVectorImpl
<llvm::Value
*> &)>
1504 CodeGenBoundParametersTy
;
1505 } // anonymous namespace
1508 checkForLastprivateConditionalUpdate(CodeGenFunction
&CGF
,
1509 const OMPExecutableDirective
&S
) {
1510 if (CGF
.getLangOpts().OpenMP
< 50)
1512 llvm::DenseSet
<CanonicalDeclPtr
<const VarDecl
>> PrivateDecls
;
1513 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
1514 for (const Expr
*Ref
: C
->varlists()) {
1515 if (!Ref
->getType()->isScalarType())
1517 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1520 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1521 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, Ref
);
1524 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
1525 for (const Expr
*Ref
: C
->varlists()) {
1526 if (!Ref
->getType()->isScalarType())
1528 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1531 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1532 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, Ref
);
1535 for (const auto *C
: S
.getClausesOfKind
<OMPLinearClause
>()) {
1536 for (const Expr
*Ref
: C
->varlists()) {
1537 if (!Ref
->getType()->isScalarType())
1539 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1542 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1543 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, Ref
);
1546 // Privates should ne analyzed since they are not captured at all.
1547 // Task reductions may be skipped - tasks are ignored.
1548 // Firstprivates do not return value but may be passed by reference - no need
1549 // to check for updated lastprivate conditional.
1550 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
1551 for (const Expr
*Ref
: C
->varlists()) {
1552 if (!Ref
->getType()->isScalarType())
1554 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1557 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1560 CGF
.CGM
.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1561 CGF
, S
, PrivateDecls
);
1564 static void emitCommonOMPParallelDirective(
1565 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
1566 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
1567 const CodeGenBoundParametersTy
&CodeGenBoundParameters
) {
1568 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_parallel
);
1569 llvm::Value
*NumThreads
= nullptr;
1570 llvm::Function
*OutlinedFn
=
1571 CGF
.CGM
.getOpenMPRuntime().emitParallelOutlinedFunction(
1572 CGF
, S
, *CS
->getCapturedDecl()->param_begin(), InnermostKind
,
1574 if (const auto *NumThreadsClause
= S
.getSingleClause
<OMPNumThreadsClause
>()) {
1575 CodeGenFunction::RunCleanupsScope
NumThreadsScope(CGF
);
1576 NumThreads
= CGF
.EmitScalarExpr(NumThreadsClause
->getNumThreads(),
1577 /*IgnoreResultAssign=*/true);
1578 CGF
.CGM
.getOpenMPRuntime().emitNumThreadsClause(
1579 CGF
, NumThreads
, NumThreadsClause
->getBeginLoc());
1581 if (const auto *ProcBindClause
= S
.getSingleClause
<OMPProcBindClause
>()) {
1582 CodeGenFunction::RunCleanupsScope
ProcBindScope(CGF
);
1583 CGF
.CGM
.getOpenMPRuntime().emitProcBindClause(
1584 CGF
, ProcBindClause
->getProcBindKind(), ProcBindClause
->getBeginLoc());
1586 const Expr
*IfCond
= nullptr;
1587 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
1588 if (C
->getNameModifier() == OMPD_unknown
||
1589 C
->getNameModifier() == OMPD_parallel
) {
1590 IfCond
= C
->getCondition();
1595 OMPParallelScope
Scope(CGF
, S
);
1596 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
1597 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1598 // lower and upper bounds with the pragma 'for' chunking mechanism.
1599 // The following lambda takes care of appending the lower and upper bound
1600 // parameters when necessary
1601 CodeGenBoundParameters(CGF
, S
, CapturedVars
);
1602 CGF
.GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
1603 CGF
.CGM
.getOpenMPRuntime().emitParallelCall(CGF
, S
.getBeginLoc(), OutlinedFn
,
1604 CapturedVars
, IfCond
, NumThreads
);
1607 static bool isAllocatableDecl(const VarDecl
*VD
) {
1608 const VarDecl
*CVD
= VD
->getCanonicalDecl();
1609 if (!CVD
->hasAttr
<OMPAllocateDeclAttr
>())
1611 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
1612 // Use the default allocation.
1613 return !((AA
->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc
||
1614 AA
->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc
) &&
1615 !AA
->getAllocator());
1618 static void emitEmptyBoundParameters(CodeGenFunction
&,
1619 const OMPExecutableDirective
&,
1620 llvm::SmallVectorImpl
<llvm::Value
*> &) {}
1622 static void emitOMPCopyinClause(CodeGenFunction
&CGF
,
1623 const OMPExecutableDirective
&S
) {
1624 bool Copyins
= CGF
.EmitOMPCopyinClause(S
);
1626 // Emit implicit barrier to synchronize threads and avoid data races on
1627 // propagation master's thread values of threadprivate variables to local
1628 // instances of that variables of all other implicit threads.
1629 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(
1630 CGF
, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
1631 /*ForceSimpleCall=*/true);
1635 Address
CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1636 CodeGenFunction
&CGF
, const VarDecl
*VD
) {
1637 CodeGenModule
&CGM
= CGF
.CGM
;
1638 auto &OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1641 return Address::invalid();
1642 const VarDecl
*CVD
= VD
->getCanonicalDecl();
1643 if (!isAllocatableDecl(CVD
))
1644 return Address::invalid();
1646 CharUnits Align
= CGM
.getContext().getDeclAlign(CVD
);
1647 if (CVD
->getType()->isVariablyModifiedType()) {
1648 Size
= CGF
.getTypeSize(CVD
->getType());
1649 // Align the size: ((size + align - 1) / align) * align
1650 Size
= CGF
.Builder
.CreateNUWAdd(
1651 Size
, CGM
.getSize(Align
- CharUnits::fromQuantity(1)));
1652 Size
= CGF
.Builder
.CreateUDiv(Size
, CGM
.getSize(Align
));
1653 Size
= CGF
.Builder
.CreateNUWMul(Size
, CGM
.getSize(Align
));
1655 CharUnits Sz
= CGM
.getContext().getTypeSizeInChars(CVD
->getType());
1656 Size
= CGM
.getSize(Sz
.alignTo(Align
));
1659 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
1660 assert(AA
->getAllocator() &&
1661 "Expected allocator expression for non-default allocator.");
1662 llvm::Value
*Allocator
= CGF
.EmitScalarExpr(AA
->getAllocator());
1663 // According to the standard, the original allocator type is a enum (integer).
1664 // Convert to pointer type, if required.
1665 if (Allocator
->getType()->isIntegerTy())
1666 Allocator
= CGF
.Builder
.CreateIntToPtr(Allocator
, CGM
.VoidPtrTy
);
1667 else if (Allocator
->getType()->isPointerTy())
1668 Allocator
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(Allocator
,
1671 llvm::Value
*Addr
= OMPBuilder
.createOMPAlloc(
1672 CGF
.Builder
, Size
, Allocator
,
1673 getNameWithSeparators({CVD
->getName(), ".void.addr"}, ".", "."));
1674 llvm::CallInst
*FreeCI
=
1675 OMPBuilder
.createOMPFree(CGF
.Builder
, Addr
, Allocator
);
1677 CGF
.EHStack
.pushCleanup
<OMPAllocateCleanupTy
>(NormalAndEHCleanup
, FreeCI
);
1678 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
1680 CGF
.ConvertTypeForMem(CGM
.getContext().getPointerType(CVD
->getType())),
1681 getNameWithSeparators({CVD
->getName(), ".addr"}, ".", "."));
1682 return Address(Addr
, CGF
.ConvertTypeForMem(CVD
->getType()), Align
);
1685 Address
CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1686 CodeGenFunction
&CGF
, const VarDecl
*VD
, Address VDAddr
,
1687 SourceLocation Loc
) {
1688 CodeGenModule
&CGM
= CGF
.CGM
;
1689 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1690 CGM
.getContext().getTargetInfo().isTLSSupported())
1693 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1695 llvm::Type
*VarTy
= VDAddr
.getElementType();
1697 CGF
.Builder
.CreatePointerCast(VDAddr
.emitRawPointer(CGF
), CGM
.Int8PtrTy
);
1698 llvm::ConstantInt
*Size
= CGM
.getSize(CGM
.GetTargetTypeStoreSize(VarTy
));
1699 std::string Suffix
= getNameWithSeparators({"cache", ""});
1700 llvm::Twine CacheName
= Twine(CGM
.getMangledName(VD
)).concat(Suffix
);
1702 llvm::CallInst
*ThreadPrivateCacheCall
=
1703 OMPBuilder
.createCachedThreadPrivate(CGF
.Builder
, Data
, Size
, CacheName
);
1705 return Address(ThreadPrivateCacheCall
, CGM
.Int8Ty
, VDAddr
.getAlignment());
1708 std::string
CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1709 ArrayRef
<StringRef
> Parts
, StringRef FirstSeparator
, StringRef Separator
) {
1710 SmallString
<128> Buffer
;
1711 llvm::raw_svector_ostream
OS(Buffer
);
1712 StringRef Sep
= FirstSeparator
;
1713 for (StringRef Part
: Parts
) {
1717 return OS
.str().str();
1720 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1721 CodeGenFunction
&CGF
, const Stmt
*RegionBodyStmt
, InsertPointTy AllocaIP
,
1722 InsertPointTy CodeGenIP
, Twine RegionName
) {
1723 CGBuilderTy
&Builder
= CGF
.Builder
;
1724 Builder
.restoreIP(CodeGenIP
);
1725 llvm::BasicBlock
*FiniBB
= splitBBWithSuffix(Builder
, /*CreateBranch=*/false,
1726 "." + RegionName
+ ".after");
1729 OMPBuilderCBHelpers::InlinedRegionBodyRAII
IRB(CGF
, AllocaIP
, *FiniBB
);
1730 CGF
.EmitStmt(RegionBodyStmt
);
1733 if (Builder
.saveIP().isSet())
1734 Builder
.CreateBr(FiniBB
);
1737 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1738 CodeGenFunction
&CGF
, const Stmt
*RegionBodyStmt
, InsertPointTy AllocaIP
,
1739 InsertPointTy CodeGenIP
, Twine RegionName
) {
1740 CGBuilderTy
&Builder
= CGF
.Builder
;
1741 Builder
.restoreIP(CodeGenIP
);
1742 llvm::BasicBlock
*FiniBB
= splitBBWithSuffix(Builder
, /*CreateBranch=*/false,
1743 "." + RegionName
+ ".after");
1746 OMPBuilderCBHelpers::OutlinedRegionBodyRAII
IRB(CGF
, AllocaIP
, *FiniBB
);
1747 CGF
.EmitStmt(RegionBodyStmt
);
1750 if (Builder
.saveIP().isSet())
1751 Builder
.CreateBr(FiniBB
);
1754 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective
&S
) {
1755 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
1756 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1757 // Check if we have any if clause associated with the directive.
1758 llvm::Value
*IfCond
= nullptr;
1759 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
1760 IfCond
= EmitScalarExpr(C
->getCondition(),
1761 /*IgnoreResultAssign=*/true);
1763 llvm::Value
*NumThreads
= nullptr;
1764 if (const auto *NumThreadsClause
= S
.getSingleClause
<OMPNumThreadsClause
>())
1765 NumThreads
= EmitScalarExpr(NumThreadsClause
->getNumThreads(),
1766 /*IgnoreResultAssign=*/true);
1768 ProcBindKind ProcBind
= OMP_PROC_BIND_default
;
1769 if (const auto *ProcBindClause
= S
.getSingleClause
<OMPProcBindClause
>())
1770 ProcBind
= ProcBindClause
->getProcBindKind();
1772 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
1774 // The cleanup callback that finalizes all variables at the given location,
1775 // thus calls destructors etc.
1776 auto FiniCB
= [this](InsertPointTy IP
) {
1777 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
1780 // Privatization callback that performs appropriate action for
1781 // shared/private/firstprivate/lastprivate/copyin/... variables.
1783 // TODO: This defaults to shared right now.
1784 auto PrivCB
= [](InsertPointTy AllocaIP
, InsertPointTy CodeGenIP
,
1785 llvm::Value
&, llvm::Value
&Val
, llvm::Value
*&ReplVal
) {
1786 // The next line is appropriate only for variables (Val) with the
1787 // data-sharing attribute "shared".
1793 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_parallel
);
1794 const Stmt
*ParallelRegionBodyStmt
= CS
->getCapturedStmt();
1796 auto BodyGenCB
= [&, this](InsertPointTy AllocaIP
,
1797 InsertPointTy CodeGenIP
) {
1798 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1799 *this, ParallelRegionBodyStmt
, AllocaIP
, CodeGenIP
, "parallel");
1802 CGCapturedStmtInfo
CGSI(*CS
, CR_OpenMP
);
1803 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(*this, &CGSI
);
1804 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
1805 AllocaInsertPt
->getParent(), AllocaInsertPt
->getIterator());
1807 OMPBuilder
.createParallel(Builder
, AllocaIP
, BodyGenCB
, PrivCB
, FiniCB
,
1808 IfCond
, NumThreads
, ProcBind
, S
.hasCancel()));
1812 // Emit parallel region as a standalone region.
1813 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
1815 OMPPrivateScope
PrivateScope(CGF
);
1816 emitOMPCopyinClause(CGF
, S
);
1817 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
1818 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
1819 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
1820 (void)PrivateScope
.Privatize();
1821 CGF
.EmitStmt(S
.getCapturedStmt(OMPD_parallel
)->getCapturedStmt());
1822 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
1826 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
1827 emitCommonOMPParallelDirective(*this, S
, OMPD_parallel
, CodeGen
,
1828 emitEmptyBoundParameters
);
1829 emitPostUpdateForReductionClause(*this, S
,
1830 [](CodeGenFunction
&) { return nullptr; });
1832 // Check for outer lastprivate conditional update.
1833 checkForLastprivateConditionalUpdate(*this, S
);
1836 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective
&S
) {
1837 EmitStmt(S
.getIfStmt());
1841 /// RAII to handle scopes for loop transformation directives.
1842 class OMPTransformDirectiveScopeRAII
{
1843 OMPLoopScope
*Scope
= nullptr;
1844 CodeGenFunction::CGCapturedStmtInfo
*CGSI
= nullptr;
1845 CodeGenFunction::CGCapturedStmtRAII
*CapInfoRAII
= nullptr;
1847 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII
&) =
1849 OMPTransformDirectiveScopeRAII
&
1850 operator=(const OMPTransformDirectiveScopeRAII
&) = delete;
1853 OMPTransformDirectiveScopeRAII(CodeGenFunction
&CGF
, const Stmt
*S
) {
1854 if (const auto *Dir
= dyn_cast
<OMPLoopBasedDirective
>(S
)) {
1855 Scope
= new OMPLoopScope(CGF
, *Dir
);
1856 CGSI
= new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP
);
1857 CapInfoRAII
= new CodeGenFunction::CGCapturedStmtRAII(CGF
, CGSI
);
1860 ~OMPTransformDirectiveScopeRAII() {
1870 static void emitBody(CodeGenFunction
&CGF
, const Stmt
*S
, const Stmt
*NextLoop
,
1871 int MaxLevel
, int Level
= 0) {
1872 assert(Level
< MaxLevel
&& "Too deep lookup during loop body codegen.");
1873 const Stmt
*SimplifiedS
= S
->IgnoreContainers();
1874 if (const auto *CS
= dyn_cast
<CompoundStmt
>(SimplifiedS
)) {
1875 PrettyStackTraceLoc
CrashInfo(
1876 CGF
.getContext().getSourceManager(), CS
->getLBracLoc(),
1877 "LLVM IR generation of compound statement ('{}')");
1879 // Keep track of the current cleanup stack depth, including debug scopes.
1880 CodeGenFunction::LexicalScope
Scope(CGF
, S
->getSourceRange());
1881 for (const Stmt
*CurStmt
: CS
->body())
1882 emitBody(CGF
, CurStmt
, NextLoop
, MaxLevel
, Level
);
1885 if (SimplifiedS
== NextLoop
) {
1886 if (auto *Dir
= dyn_cast
<OMPLoopTransformationDirective
>(SimplifiedS
))
1887 SimplifiedS
= Dir
->getTransformedStmt();
1888 if (const auto *CanonLoop
= dyn_cast
<OMPCanonicalLoop
>(SimplifiedS
))
1889 SimplifiedS
= CanonLoop
->getLoopStmt();
1890 if (const auto *For
= dyn_cast
<ForStmt
>(SimplifiedS
)) {
1893 assert(isa
<CXXForRangeStmt
>(SimplifiedS
) &&
1894 "Expected canonical for loop or range-based for loop.");
1895 const auto *CXXFor
= cast
<CXXForRangeStmt
>(SimplifiedS
);
1896 CGF
.EmitStmt(CXXFor
->getLoopVarStmt());
1897 S
= CXXFor
->getBody();
1899 if (Level
+ 1 < MaxLevel
) {
1900 NextLoop
= OMPLoopDirective::tryToFindNextInnerLoop(
1901 S
, /*TryImperfectlyNestedLoops=*/true);
1902 emitBody(CGF
, S
, NextLoop
, MaxLevel
, Level
+ 1);
1909 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective
&D
,
1910 JumpDest LoopExit
) {
1911 RunCleanupsScope
BodyScope(*this);
1912 // Update counters values on current iteration.
1913 for (const Expr
*UE
: D
.updates())
1914 EmitIgnoredExpr(UE
);
1915 // Update the linear variables.
1916 // In distribute directives only loop counters may be marked as linear, no
1917 // need to generate the code for them.
1918 if (!isOpenMPDistributeDirective(D
.getDirectiveKind())) {
1919 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
1920 for (const Expr
*UE
: C
->updates())
1921 EmitIgnoredExpr(UE
);
1925 // On a continue in the body, jump to the end.
1926 JumpDest Continue
= getJumpDestInCurrentScope("omp.body.continue");
1927 BreakContinueStack
.push_back(BreakContinue(LoopExit
, Continue
));
1928 for (const Expr
*E
: D
.finals_conditions()) {
1931 // Check that loop counter in non-rectangular nest fits into the iteration
1933 llvm::BasicBlock
*NextBB
= createBasicBlock("omp.body.next");
1934 EmitBranchOnBoolExpr(E
, NextBB
, Continue
.getBlock(),
1935 getProfileCount(D
.getBody()));
1939 OMPPrivateScope
InscanScope(*this);
1940 EmitOMPReductionClauseInit(D
, InscanScope
, /*ForInscan=*/true);
1941 bool IsInscanRegion
= InscanScope
.Privatize();
1942 if (IsInscanRegion
) {
1943 // Need to remember the block before and after scan directive
1944 // to dispatch them correctly depending on the clause used in
1945 // this directive, inclusive or exclusive. For inclusive scan the natural
1946 // order of the blocks is used, for exclusive clause the blocks must be
1947 // executed in reverse order.
1948 OMPBeforeScanBlock
= createBasicBlock("omp.before.scan.bb");
1949 OMPAfterScanBlock
= createBasicBlock("omp.after.scan.bb");
1950 // No need to allocate inscan exit block, in simd mode it is selected in the
1951 // codegen for the scan directive.
1952 if (D
.getDirectiveKind() != OMPD_simd
&& !getLangOpts().OpenMPSimd
)
1953 OMPScanExitBlock
= createBasicBlock("omp.exit.inscan.bb");
1954 OMPScanDispatch
= createBasicBlock("omp.inscan.dispatch");
1955 EmitBranch(OMPScanDispatch
);
1956 EmitBlock(OMPBeforeScanBlock
);
1959 // Emit loop variables for C++ range loops.
1961 D
.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1963 emitBody(*this, Body
,
1964 OMPLoopBasedDirective::tryToFindNextInnerLoop(
1965 Body
, /*TryImperfectlyNestedLoops=*/true),
1966 D
.getLoopsNumber());
1968 // Jump to the dispatcher at the end of the loop body.
1970 EmitBranch(OMPScanExitBlock
);
1972 // The end (updates/cleanups).
1973 EmitBlock(Continue
.getBlock());
1974 BreakContinueStack
.pop_back();
1977 using EmittedClosureTy
= std::pair
<llvm::Function
*, llvm::Value
*>;
1979 /// Emit a captured statement and return the function as well as its captured
1980 /// closure context.
1981 static EmittedClosureTy
emitCapturedStmtFunc(CodeGenFunction
&ParentCGF
,
1982 const CapturedStmt
*S
) {
1983 LValue CapStruct
= ParentCGF
.InitCapturedStruct(*S
);
1984 CodeGenFunction
CGF(ParentCGF
.CGM
, /*suppressNewContext=*/true);
1985 std::unique_ptr
<CodeGenFunction::CGCapturedStmtInfo
> CSI
=
1986 std::make_unique
<CodeGenFunction::CGCapturedStmtInfo
>(*S
);
1987 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, CSI
.get());
1988 llvm::Function
*F
= CGF
.GenerateCapturedStmtFunction(*S
);
1990 return {F
, CapStruct
.getPointer(ParentCGF
)};
1993 /// Emit a call to a previously captured closure.
1994 static llvm::CallInst
*
1995 emitCapturedStmtCall(CodeGenFunction
&ParentCGF
, EmittedClosureTy Cap
,
1996 llvm::ArrayRef
<llvm::Value
*> Args
) {
1997 // Append the closure context to the argument.
1998 SmallVector
<llvm::Value
*> EffectiveArgs
;
1999 EffectiveArgs
.reserve(Args
.size() + 1);
2000 llvm::append_range(EffectiveArgs
, Args
);
2001 EffectiveArgs
.push_back(Cap
.second
);
2003 return ParentCGF
.Builder
.CreateCall(Cap
.first
, EffectiveArgs
);
2006 llvm::CanonicalLoopInfo
*
2007 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt
*S
, int Depth
) {
2008 assert(Depth
== 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
2010 // The caller is processing the loop-associated directive processing the \p
2011 // Depth loops nested in \p S. Put the previous pending loop-associated
2012 // directive to the stack. If the current loop-associated directive is a loop
2013 // transformation directive, it will push its generated loops onto the stack
2014 // such that together with the loops left here they form the combined loop
2015 // nest for the parent loop-associated directive.
2016 int ParentExpectedOMPLoopDepth
= ExpectedOMPLoopDepth
;
2017 ExpectedOMPLoopDepth
= Depth
;
2020 assert(OMPLoopNestStack
.size() >= (size_t)Depth
&& "Found too few loops");
2022 // The last added loop is the outermost one.
2023 llvm::CanonicalLoopInfo
*Result
= OMPLoopNestStack
.back();
2025 // Pop the \p Depth loops requested by the call from that stack and restore
2026 // the previous context.
2027 OMPLoopNestStack
.pop_back_n(Depth
);
2028 ExpectedOMPLoopDepth
= ParentExpectedOMPLoopDepth
;
2033 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop
*S
) {
2034 const Stmt
*SyntacticalLoop
= S
->getLoopStmt();
2035 if (!getLangOpts().OpenMPIRBuilder
) {
2036 // Ignore if OpenMPIRBuilder is not enabled.
2037 EmitStmt(SyntacticalLoop
);
2041 LexicalScope
ForScope(*this, S
->getSourceRange());
2043 // Emit init statements. The Distance/LoopVar funcs may reference variable
2044 // declarations they contain.
2045 const Stmt
*BodyStmt
;
2046 if (const auto *For
= dyn_cast
<ForStmt
>(SyntacticalLoop
)) {
2047 if (const Stmt
*InitStmt
= For
->getInit())
2049 BodyStmt
= For
->getBody();
2050 } else if (const auto *RangeFor
=
2051 dyn_cast
<CXXForRangeStmt
>(SyntacticalLoop
)) {
2052 if (const DeclStmt
*RangeStmt
= RangeFor
->getRangeStmt())
2053 EmitStmt(RangeStmt
);
2054 if (const DeclStmt
*BeginStmt
= RangeFor
->getBeginStmt())
2055 EmitStmt(BeginStmt
);
2056 if (const DeclStmt
*EndStmt
= RangeFor
->getEndStmt())
2058 if (const DeclStmt
*LoopVarStmt
= RangeFor
->getLoopVarStmt())
2059 EmitStmt(LoopVarStmt
);
2060 BodyStmt
= RangeFor
->getBody();
2062 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2064 // Emit closure for later use. By-value captures will be captured here.
2065 const CapturedStmt
*DistanceFunc
= S
->getDistanceFunc();
2066 EmittedClosureTy DistanceClosure
= emitCapturedStmtFunc(*this, DistanceFunc
);
2067 const CapturedStmt
*LoopVarFunc
= S
->getLoopVarFunc();
2068 EmittedClosureTy LoopVarClosure
= emitCapturedStmtFunc(*this, LoopVarFunc
);
2070 // Call the distance function to get the number of iterations of the loop to
2072 QualType LogicalTy
= DistanceFunc
->getCapturedDecl()
2075 .getNonReferenceType();
2076 RawAddress CountAddr
= CreateMemTemp(LogicalTy
, ".count.addr");
2077 emitCapturedStmtCall(*this, DistanceClosure
, {CountAddr
.getPointer()});
2078 llvm::Value
*DistVal
= Builder
.CreateLoad(CountAddr
, ".count");
2080 // Emit the loop structure.
2081 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
2082 auto BodyGen
= [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP
,
2083 llvm::Value
*IndVar
) {
2084 Builder
.restoreIP(CodeGenIP
);
2086 // Emit the loop body: Convert the logical iteration number to the loop
2087 // variable and emit the body.
2088 const DeclRefExpr
*LoopVarRef
= S
->getLoopVarRef();
2089 LValue LCVal
= EmitLValue(LoopVarRef
);
2090 Address LoopVarAddress
= LCVal
.getAddress();
2091 emitCapturedStmtCall(*this, LoopVarClosure
,
2092 {LoopVarAddress
.emitRawPointer(*this), IndVar
});
2094 RunCleanupsScope
BodyScope(*this);
2097 llvm::CanonicalLoopInfo
*CL
=
2098 OMPBuilder
.createCanonicalLoop(Builder
, BodyGen
, DistVal
);
2100 // Finish up the loop.
2101 Builder
.restoreIP(CL
->getAfterIP());
2102 ForScope
.ForceCleanup();
2104 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2105 OMPLoopNestStack
.push_back(CL
);
2108 void CodeGenFunction::EmitOMPInnerLoop(
2109 const OMPExecutableDirective
&S
, bool RequiresCleanup
, const Expr
*LoopCond
,
2110 const Expr
*IncExpr
,
2111 const llvm::function_ref
<void(CodeGenFunction
&)> BodyGen
,
2112 const llvm::function_ref
<void(CodeGenFunction
&)> PostIncGen
) {
2113 auto LoopExit
= getJumpDestInCurrentScope("omp.inner.for.end");
2115 // Start the loop with a block that tests the condition.
2116 auto CondBlock
= createBasicBlock("omp.inner.for.cond");
2117 EmitBlock(CondBlock
);
2118 const SourceRange R
= S
.getSourceRange();
2120 // If attributes are attached, push to the basic block with them.
2121 const auto &OMPED
= cast
<OMPExecutableDirective
>(S
);
2122 const CapturedStmt
*ICS
= OMPED
.getInnermostCapturedStmt();
2123 const Stmt
*SS
= ICS
->getCapturedStmt();
2124 const AttributedStmt
*AS
= dyn_cast_or_null
<AttributedStmt
>(SS
);
2125 OMPLoopNestStack
.clear();
2127 LoopStack
.push(CondBlock
, CGM
.getContext(), CGM
.getCodeGenOpts(),
2128 AS
->getAttrs(), SourceLocToDebugLoc(R
.getBegin()),
2129 SourceLocToDebugLoc(R
.getEnd()));
2131 LoopStack
.push(CondBlock
, SourceLocToDebugLoc(R
.getBegin()),
2132 SourceLocToDebugLoc(R
.getEnd()));
2134 // If there are any cleanups between here and the loop-exit scope,
2135 // create a block to stage a loop exit along.
2136 llvm::BasicBlock
*ExitBlock
= LoopExit
.getBlock();
2137 if (RequiresCleanup
)
2138 ExitBlock
= createBasicBlock("omp.inner.for.cond.cleanup");
2140 llvm::BasicBlock
*LoopBody
= createBasicBlock("omp.inner.for.body");
2143 EmitBranchOnBoolExpr(LoopCond
, LoopBody
, ExitBlock
, getProfileCount(&S
));
2144 if (ExitBlock
!= LoopExit
.getBlock()) {
2145 EmitBlock(ExitBlock
);
2146 EmitBranchThroughCleanup(LoopExit
);
2149 EmitBlock(LoopBody
);
2150 incrementProfileCounter(&S
);
2152 // Create a block for the increment.
2153 JumpDest Continue
= getJumpDestInCurrentScope("omp.inner.for.inc");
2154 BreakContinueStack
.push_back(BreakContinue(LoopExit
, Continue
));
2158 // Emit "IV = IV + 1" and a back-edge to the condition block.
2159 EmitBlock(Continue
.getBlock());
2160 EmitIgnoredExpr(IncExpr
);
2162 BreakContinueStack
.pop_back();
2163 EmitBranch(CondBlock
);
2165 // Emit the fall-through block.
2166 EmitBlock(LoopExit
.getBlock());
2169 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective
&D
) {
2170 if (!HaveInsertPoint())
2172 // Emit inits for the linear variables.
2173 bool HasLinears
= false;
2174 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
2175 for (const Expr
*Init
: C
->inits()) {
2177 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(Init
)->getDecl());
2178 if (const auto *Ref
=
2179 dyn_cast
<DeclRefExpr
>(VD
->getInit()->IgnoreImpCasts())) {
2180 AutoVarEmission Emission
= EmitAutoVarAlloca(*VD
);
2181 const auto *OrigVD
= cast
<VarDecl
>(Ref
->getDecl());
2182 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
2183 CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
2184 VD
->getInit()->getType(), VK_LValue
,
2185 VD
->getInit()->getExprLoc());
2188 MakeAddrLValue(Emission
.getAllocatedAddress(), VD
->getType()),
2189 /*capturedByInit=*/false);
2190 EmitAutoVarCleanups(Emission
);
2195 // Emit the linear steps for the linear clauses.
2196 // If a step is not constant, it is pre-calculated before the loop.
2197 if (const auto *CS
= cast_or_null
<BinaryOperator
>(C
->getCalcStep()))
2198 if (const auto *SaveRef
= cast
<DeclRefExpr
>(CS
->getLHS())) {
2199 EmitVarDecl(*cast
<VarDecl
>(SaveRef
->getDecl()));
2200 // Emit calculation of the linear step.
2201 EmitIgnoredExpr(CS
);
2207 void CodeGenFunction::EmitOMPLinearClauseFinal(
2208 const OMPLoopDirective
&D
,
2209 const llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> CondGen
) {
2210 if (!HaveInsertPoint())
2212 llvm::BasicBlock
*DoneBB
= nullptr;
2213 // Emit the final values of the linear variables.
2214 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
2215 auto IC
= C
->varlist_begin();
2216 for (const Expr
*F
: C
->finals()) {
2218 if (llvm::Value
*Cond
= CondGen(*this)) {
2219 // If the first post-update expression is found, emit conditional
2220 // block if it was requested.
2221 llvm::BasicBlock
*ThenBB
= createBasicBlock(".omp.linear.pu");
2222 DoneBB
= createBasicBlock(".omp.linear.pu.done");
2223 Builder
.CreateCondBr(Cond
, ThenBB
, DoneBB
);
2227 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IC
)->getDecl());
2228 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
2229 CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
2230 (*IC
)->getType(), VK_LValue
, (*IC
)->getExprLoc());
2231 Address OrigAddr
= EmitLValue(&DRE
).getAddress();
2232 CodeGenFunction::OMPPrivateScope
VarScope(*this);
2233 VarScope
.addPrivate(OrigVD
, OrigAddr
);
2234 (void)VarScope
.Privatize();
2238 if (const Expr
*PostUpdate
= C
->getPostUpdateExpr())
2239 EmitIgnoredExpr(PostUpdate
);
2242 EmitBlock(DoneBB
, /*IsFinished=*/true);
2245 static void emitAlignedClause(CodeGenFunction
&CGF
,
2246 const OMPExecutableDirective
&D
) {
2247 if (!CGF
.HaveInsertPoint())
2249 for (const auto *Clause
: D
.getClausesOfKind
<OMPAlignedClause
>()) {
2250 llvm::APInt
ClauseAlignment(64, 0);
2251 if (const Expr
*AlignmentExpr
= Clause
->getAlignment()) {
2253 cast
<llvm::ConstantInt
>(CGF
.EmitScalarExpr(AlignmentExpr
));
2254 ClauseAlignment
= AlignmentCI
->getValue();
2256 for (const Expr
*E
: Clause
->varlists()) {
2257 llvm::APInt
Alignment(ClauseAlignment
);
2258 if (Alignment
== 0) {
2259 // OpenMP [2.8.1, Description]
2260 // If no optional parameter is specified, implementation-defined default
2261 // alignments for SIMD instructions on the target platforms are assumed.
2264 .toCharUnitsFromBits(CGF
.getContext().getOpenMPDefaultSimdAlign(
2265 E
->getType()->getPointeeType()))
2268 assert((Alignment
== 0 || Alignment
.isPowerOf2()) &&
2269 "alignment is not power of 2");
2270 if (Alignment
!= 0) {
2271 llvm::Value
*PtrValue
= CGF
.EmitScalarExpr(E
);
2272 CGF
.emitAlignmentAssumption(
2273 PtrValue
, E
, /*No second loc needed*/ SourceLocation(),
2274 llvm::ConstantInt::get(CGF
.getLLVMContext(), Alignment
));
2280 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2281 const OMPLoopDirective
&S
, CodeGenFunction::OMPPrivateScope
&LoopScope
) {
2282 if (!HaveInsertPoint())
2284 auto I
= S
.private_counters().begin();
2285 for (const Expr
*E
: S
.counters()) {
2286 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
2287 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl());
2288 // Emit var without initialization.
2289 AutoVarEmission VarEmission
= EmitAutoVarAlloca(*PrivateVD
);
2290 EmitAutoVarCleanups(VarEmission
);
2291 LocalDeclMap
.erase(PrivateVD
);
2292 (void)LoopScope
.addPrivate(VD
, VarEmission
.getAllocatedAddress());
2293 if (LocalDeclMap
.count(VD
) || CapturedStmtInfo
->lookup(VD
) ||
2294 VD
->hasGlobalStorage()) {
2295 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(VD
),
2296 LocalDeclMap
.count(VD
) || CapturedStmtInfo
->lookup(VD
),
2297 E
->getType(), VK_LValue
, E
->getExprLoc());
2298 (void)LoopScope
.addPrivate(PrivateVD
, EmitLValue(&DRE
).getAddress());
2300 (void)LoopScope
.addPrivate(PrivateVD
, VarEmission
.getAllocatedAddress());
2304 // Privatize extra loop counters used in loops for ordered(n) clauses.
2305 for (const auto *C
: S
.getClausesOfKind
<OMPOrderedClause
>()) {
2306 if (!C
->getNumForLoops())
2308 for (unsigned I
= S
.getLoopsNumber(), E
= C
->getLoopNumIterations().size();
2310 const auto *DRE
= cast
<DeclRefExpr
>(C
->getLoopCounter(I
));
2311 const auto *VD
= cast
<VarDecl
>(DRE
->getDecl());
2312 // Override only those variables that can be captured to avoid re-emission
2313 // of the variables declared within the loops.
2314 if (DRE
->refersToEnclosingVariableOrCapture()) {
2315 (void)LoopScope
.addPrivate(
2316 VD
, CreateMemTemp(DRE
->getType(), VD
->getName()));
2322 static void emitPreCond(CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2323 const Expr
*Cond
, llvm::BasicBlock
*TrueBlock
,
2324 llvm::BasicBlock
*FalseBlock
, uint64_t TrueCount
) {
2325 if (!CGF
.HaveInsertPoint())
2328 CodeGenFunction::OMPPrivateScope
PreCondScope(CGF
);
2329 CGF
.EmitOMPPrivateLoopCounters(S
, PreCondScope
);
2330 (void)PreCondScope
.Privatize();
2331 // Get initial values of real counters.
2332 for (const Expr
*I
: S
.inits()) {
2333 CGF
.EmitIgnoredExpr(I
);
2336 // Create temp loop control variables with their init values to support
2337 // non-rectangular loops.
2338 CodeGenFunction::OMPMapVars PreCondVars
;
2339 for (const Expr
*E
: S
.dependent_counters()) {
2342 assert(!E
->getType().getNonReferenceType()->isRecordType() &&
2343 "dependent counter must not be an iterator.");
2344 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
2345 Address CounterAddr
=
2346 CGF
.CreateMemTemp(VD
->getType().getNonReferenceType());
2347 (void)PreCondVars
.setVarAddr(CGF
, VD
, CounterAddr
);
2349 (void)PreCondVars
.apply(CGF
);
2350 for (const Expr
*E
: S
.dependent_inits()) {
2353 CGF
.EmitIgnoredExpr(E
);
2355 // Check that loop is executed at least one time.
2356 CGF
.EmitBranchOnBoolExpr(Cond
, TrueBlock
, FalseBlock
, TrueCount
);
2357 PreCondVars
.restore(CGF
);
2360 void CodeGenFunction::EmitOMPLinearClause(
2361 const OMPLoopDirective
&D
, CodeGenFunction::OMPPrivateScope
&PrivateScope
) {
2362 if (!HaveInsertPoint())
2364 llvm::DenseSet
<const VarDecl
*> SIMDLCVs
;
2365 if (isOpenMPSimdDirective(D
.getDirectiveKind())) {
2366 const auto *LoopDirective
= cast
<OMPLoopDirective
>(&D
);
2367 for (const Expr
*C
: LoopDirective
->counters()) {
2369 cast
<VarDecl
>(cast
<DeclRefExpr
>(C
)->getDecl())->getCanonicalDecl());
2372 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
2373 auto CurPrivate
= C
->privates().begin();
2374 for (const Expr
*E
: C
->varlists()) {
2375 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
2376 const auto *PrivateVD
=
2377 cast
<VarDecl
>(cast
<DeclRefExpr
>(*CurPrivate
)->getDecl());
2378 if (!SIMDLCVs
.count(VD
->getCanonicalDecl())) {
2379 // Emit private VarDecl with copy init.
2380 EmitVarDecl(*PrivateVD
);
2382 PrivateScope
.addPrivate(VD
, GetAddrOfLocalVar(PrivateVD
));
2383 assert(IsRegistered
&& "linear var already registered as private");
2384 // Silence the warning about unused variable.
2387 EmitVarDecl(*PrivateVD
);
2394 static void emitSimdlenSafelenClause(CodeGenFunction
&CGF
,
2395 const OMPExecutableDirective
&D
) {
2396 if (!CGF
.HaveInsertPoint())
2398 if (const auto *C
= D
.getSingleClause
<OMPSimdlenClause
>()) {
2399 RValue Len
= CGF
.EmitAnyExpr(C
->getSimdlen(), AggValueSlot::ignored(),
2400 /*ignoreResult=*/true);
2401 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2402 CGF
.LoopStack
.setVectorizeWidth(Val
->getZExtValue());
2403 // In presence of finite 'safelen', it may be unsafe to mark all
2404 // the memory instructions parallel, because loop-carried
2405 // dependences of 'safelen' iterations are possible.
2406 CGF
.LoopStack
.setParallel(!D
.getSingleClause
<OMPSafelenClause
>());
2407 } else if (const auto *C
= D
.getSingleClause
<OMPSafelenClause
>()) {
2408 RValue Len
= CGF
.EmitAnyExpr(C
->getSafelen(), AggValueSlot::ignored(),
2409 /*ignoreResult=*/true);
2410 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2411 CGF
.LoopStack
.setVectorizeWidth(Val
->getZExtValue());
2412 // In presence of finite 'safelen', it may be unsafe to mark all
2413 // the memory instructions parallel, because loop-carried
2414 // dependences of 'safelen' iterations are possible.
2415 CGF
.LoopStack
.setParallel(/*Enable=*/false);
2419 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
&D
) {
2420 // Walk clauses and process safelen/lastprivate.
2421 LoopStack
.setParallel(/*Enable=*/true);
2422 LoopStack
.setVectorizeEnable();
2423 emitSimdlenSafelenClause(*this, D
);
2424 if (const auto *C
= D
.getSingleClause
<OMPOrderClause
>())
2425 if (C
->getKind() == OMPC_ORDER_concurrent
)
2426 LoopStack
.setParallel(/*Enable=*/true);
2427 if ((D
.getDirectiveKind() == OMPD_simd
||
2428 (getLangOpts().OpenMPSimd
&&
2429 isOpenMPSimdDirective(D
.getDirectiveKind()))) &&
2430 llvm::any_of(D
.getClausesOfKind
<OMPReductionClause
>(),
2431 [](const OMPReductionClause
*C
) {
2432 return C
->getModifier() == OMPC_REDUCTION_inscan
;
2434 // Disable parallel access in case of prefix sum.
2435 LoopStack
.setParallel(/*Enable=*/false);
2438 void CodeGenFunction::EmitOMPSimdFinal(
2439 const OMPLoopDirective
&D
,
2440 const llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> CondGen
) {
2441 if (!HaveInsertPoint())
2443 llvm::BasicBlock
*DoneBB
= nullptr;
2444 auto IC
= D
.counters().begin();
2445 auto IPC
= D
.private_counters().begin();
2446 for (const Expr
*F
: D
.finals()) {
2447 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>((*IC
))->getDecl());
2448 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>((*IPC
))->getDecl());
2449 const auto *CED
= dyn_cast
<OMPCapturedExprDecl
>(OrigVD
);
2450 if (LocalDeclMap
.count(OrigVD
) || CapturedStmtInfo
->lookup(OrigVD
) ||
2451 OrigVD
->hasGlobalStorage() || CED
) {
2453 if (llvm::Value
*Cond
= CondGen(*this)) {
2454 // If the first post-update expression is found, emit conditional
2455 // block if it was requested.
2456 llvm::BasicBlock
*ThenBB
= createBasicBlock(".omp.final.then");
2457 DoneBB
= createBasicBlock(".omp.final.done");
2458 Builder
.CreateCondBr(Cond
, ThenBB
, DoneBB
);
2462 Address OrigAddr
= Address::invalid();
2464 OrigAddr
= EmitLValue(CED
->getInit()->IgnoreImpCasts()).getAddress();
2466 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(PrivateVD
),
2467 /*RefersToEnclosingVariableOrCapture=*/false,
2468 (*IPC
)->getType(), VK_LValue
, (*IPC
)->getExprLoc());
2469 OrigAddr
= EmitLValue(&DRE
).getAddress();
2471 OMPPrivateScope
VarScope(*this);
2472 VarScope
.addPrivate(OrigVD
, OrigAddr
);
2473 (void)VarScope
.Privatize();
2480 EmitBlock(DoneBB
, /*IsFinished=*/true);
2483 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction
&CGF
,
2484 const OMPLoopDirective
&S
,
2485 CodeGenFunction::JumpDest LoopExit
) {
2486 CGF
.EmitOMPLoopBody(S
, LoopExit
);
2487 CGF
.EmitStopPoint(&S
);
2490 /// Emit a helper variable and return corresponding lvalue.
2491 static LValue
EmitOMPHelperVar(CodeGenFunction
&CGF
,
2492 const DeclRefExpr
*Helper
) {
2493 auto VDecl
= cast
<VarDecl
>(Helper
->getDecl());
2494 CGF
.EmitVarDecl(*VDecl
);
2495 return CGF
.EmitLValue(Helper
);
2498 static void emitCommonSimdLoop(CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2499 const RegionCodeGenTy
&SimdInitGen
,
2500 const RegionCodeGenTy
&BodyCodeGen
) {
2501 auto &&ThenGen
= [&S
, &SimdInitGen
, &BodyCodeGen
](CodeGenFunction
&CGF
,
2502 PrePostActionTy
&) {
2503 CGOpenMPRuntime::NontemporalDeclsRAII
NontemporalsRegion(CGF
.CGM
, S
);
2504 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
2509 auto &&ElseGen
= [&BodyCodeGen
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2510 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
2511 CGF
.LoopStack
.setVectorizeEnable(/*Enable=*/false);
2515 const Expr
*IfCond
= nullptr;
2516 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
2517 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
2518 if (CGF
.getLangOpts().OpenMP
>= 50 &&
2519 (C
->getNameModifier() == OMPD_unknown
||
2520 C
->getNameModifier() == OMPD_simd
)) {
2521 IfCond
= C
->getCondition();
2527 CGF
.CGM
.getOpenMPRuntime().emitIfClause(CGF
, IfCond
, ThenGen
, ElseGen
);
2529 RegionCodeGenTy
ThenRCG(ThenGen
);
2534 static void emitOMPSimdRegion(CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2535 PrePostActionTy
&Action
) {
2537 assert(isOpenMPSimdDirective(S
.getDirectiveKind()) &&
2538 "Expected simd directive");
2539 OMPLoopScope
PreInitScope(CGF
, S
);
2541 // for (IV in 0..LastIteration) BODY;
2542 // <Final counter/linear vars updates>;
2545 if (isOpenMPDistributeDirective(S
.getDirectiveKind()) ||
2546 isOpenMPWorksharingDirective(S
.getDirectiveKind()) ||
2547 isOpenMPTaskLoopDirective(S
.getDirectiveKind())) {
2548 (void)EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(S
.getLowerBoundVariable()));
2549 (void)EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(S
.getUpperBoundVariable()));
2552 // Emit: if (PreCond) - begin.
2553 // If the condition constant folds and can be elided, avoid emitting the
2556 llvm::BasicBlock
*ContBlock
= nullptr;
2557 if (CGF
.ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
2561 llvm::BasicBlock
*ThenBlock
= CGF
.createBasicBlock("simd.if.then");
2562 ContBlock
= CGF
.createBasicBlock("simd.if.end");
2563 emitPreCond(CGF
, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
2564 CGF
.getProfileCount(&S
));
2565 CGF
.EmitBlock(ThenBlock
);
2566 CGF
.incrementProfileCounter(&S
);
2569 // Emit the loop iteration variable.
2570 const Expr
*IVExpr
= S
.getIterationVariable();
2571 const auto *IVDecl
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IVExpr
)->getDecl());
2572 CGF
.EmitVarDecl(*IVDecl
);
2573 CGF
.EmitIgnoredExpr(S
.getInit());
2575 // Emit the iterations count variable.
2576 // If it is not a variable, Sema decided to calculate iterations count on
2577 // each iteration (e.g., it is foldable into a constant).
2578 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
2579 CGF
.EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
2580 // Emit calculation of the iterations count.
2581 CGF
.EmitIgnoredExpr(S
.getCalcLastIteration());
2584 emitAlignedClause(CGF
, S
);
2585 (void)CGF
.EmitOMPLinearClauseInit(S
);
2587 CodeGenFunction::OMPPrivateScope
LoopScope(CGF
);
2588 CGF
.EmitOMPPrivateClause(S
, LoopScope
);
2589 CGF
.EmitOMPPrivateLoopCounters(S
, LoopScope
);
2590 CGF
.EmitOMPLinearClause(S
, LoopScope
);
2591 CGF
.EmitOMPReductionClauseInit(S
, LoopScope
);
2592 CGOpenMPRuntime::LastprivateConditionalRAII
LPCRegion(
2593 CGF
, S
, CGF
.EmitLValue(S
.getIterationVariable()));
2594 bool HasLastprivateClause
= CGF
.EmitOMPLastprivateClauseInit(S
, LoopScope
);
2595 (void)LoopScope
.Privatize();
2596 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
2597 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
2601 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2602 CGF
.EmitOMPSimdInit(S
);
2604 [&S
, &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2605 CGF
.EmitOMPInnerLoop(
2606 S
, LoopScope
.requiresCleanups(), S
.getCond(), S
.getInc(),
2607 [&S
](CodeGenFunction
&CGF
) {
2608 emitOMPLoopBodyWithStopPoint(CGF
, S
,
2609 CodeGenFunction::JumpDest());
2611 [](CodeGenFunction
&) {});
2613 CGF
.EmitOMPSimdFinal(S
, [](CodeGenFunction
&) { return nullptr; });
2614 // Emit final copy of the lastprivate variables at the end of loops.
2615 if (HasLastprivateClause
)
2616 CGF
.EmitOMPLastprivateClauseFinal(S
, /*NoFinals=*/true);
2617 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_simd
);
2618 emitPostUpdateForReductionClause(CGF
, S
,
2619 [](CodeGenFunction
&) { return nullptr; });
2620 LoopScope
.restoreMap();
2621 CGF
.EmitOMPLinearClauseFinal(S
, [](CodeGenFunction
&) { return nullptr; });
2623 // Emit: if (PreCond) - end.
2625 CGF
.EmitBranch(ContBlock
);
2626 CGF
.EmitBlock(ContBlock
, true);
2630 static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective
&S
) {
2631 // Check for unsupported clauses
2632 for (OMPClause
*C
: S
.clauses()) {
2633 // Currently only order, simdlen and safelen clauses are supported
2634 if (!(isa
<OMPSimdlenClause
>(C
) || isa
<OMPSafelenClause
>(C
) ||
2635 isa
<OMPOrderClause
>(C
) || isa
<OMPAlignedClause
>(C
)))
2639 // Check if we have a statement with the ordered directive.
2640 // Visit the statement hierarchy to find a compound statement
2641 // with a ordered directive in it.
2642 if (const auto *CanonLoop
= dyn_cast
<OMPCanonicalLoop
>(S
.getRawStmt())) {
2643 if (const Stmt
*SyntacticalLoop
= CanonLoop
->getLoopStmt()) {
2644 for (const Stmt
*SubStmt
: SyntacticalLoop
->children()) {
2647 if (const CompoundStmt
*CS
= dyn_cast
<CompoundStmt
>(SubStmt
)) {
2648 for (const Stmt
*CSSubStmt
: CS
->children()) {
2651 if (isa
<OMPOrderedDirective
>(CSSubStmt
)) {
2661 static llvm::MapVector
<llvm::Value
*, llvm::Value
*>
2662 GetAlignedMapping(const OMPSimdDirective
&S
, CodeGenFunction
&CGF
) {
2663 llvm::MapVector
<llvm::Value
*, llvm::Value
*> AlignedVars
;
2664 for (const auto *Clause
: S
.getClausesOfKind
<OMPAlignedClause
>()) {
2665 llvm::APInt
ClauseAlignment(64, 0);
2666 if (const Expr
*AlignmentExpr
= Clause
->getAlignment()) {
2668 cast
<llvm::ConstantInt
>(CGF
.EmitScalarExpr(AlignmentExpr
));
2669 ClauseAlignment
= AlignmentCI
->getValue();
2671 for (const Expr
*E
: Clause
->varlists()) {
2672 llvm::APInt
Alignment(ClauseAlignment
);
2673 if (Alignment
== 0) {
2674 // OpenMP [2.8.1, Description]
2675 // If no optional parameter is specified, implementation-defined default
2676 // alignments for SIMD instructions on the target platforms are assumed.
2679 .toCharUnitsFromBits(CGF
.getContext().getOpenMPDefaultSimdAlign(
2680 E
->getType()->getPointeeType()))
2683 assert((Alignment
== 0 || Alignment
.isPowerOf2()) &&
2684 "alignment is not power of 2");
2685 llvm::Value
*PtrValue
= CGF
.EmitScalarExpr(E
);
2686 AlignedVars
[PtrValue
] = CGF
.Builder
.getInt64(Alignment
.getSExtValue());
2692 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective
&S
) {
2693 bool UseOMPIRBuilder
=
2694 CGM
.getLangOpts().OpenMPIRBuilder
&& isSupportedByOpenMPIRBuilder(S
);
2695 if (UseOMPIRBuilder
) {
2696 auto &&CodeGenIRBuilder
= [this, &S
, UseOMPIRBuilder
](CodeGenFunction
&CGF
,
2697 PrePostActionTy
&) {
2698 // Use the OpenMPIRBuilder if enabled.
2699 if (UseOMPIRBuilder
) {
2700 llvm::MapVector
<llvm::Value
*, llvm::Value
*> AlignedVars
=
2701 GetAlignedMapping(S
, CGF
);
2702 // Emit the associated statement and get its loop representation.
2703 const Stmt
*Inner
= S
.getRawStmt();
2704 llvm::CanonicalLoopInfo
*CLI
=
2705 EmitOMPCollapsedCanonicalLoopNest(Inner
, 1);
2707 llvm::OpenMPIRBuilder
&OMPBuilder
=
2708 CGM
.getOpenMPRuntime().getOMPBuilder();
2709 // Add SIMD specific metadata
2710 llvm::ConstantInt
*Simdlen
= nullptr;
2711 if (const auto *C
= S
.getSingleClause
<OMPSimdlenClause
>()) {
2713 this->EmitAnyExpr(C
->getSimdlen(), AggValueSlot::ignored(),
2714 /*ignoreResult=*/true);
2715 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2718 llvm::ConstantInt
*Safelen
= nullptr;
2719 if (const auto *C
= S
.getSingleClause
<OMPSafelenClause
>()) {
2721 this->EmitAnyExpr(C
->getSafelen(), AggValueSlot::ignored(),
2722 /*ignoreResult=*/true);
2723 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2726 llvm::omp::OrderKind Order
= llvm::omp::OrderKind::OMP_ORDER_unknown
;
2727 if (const auto *C
= S
.getSingleClause
<OMPOrderClause
>()) {
2728 if (C
->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent
) {
2729 Order
= llvm::omp::OrderKind::OMP_ORDER_concurrent
;
2732 // Add simd metadata to the collapsed loop. Do not generate
2733 // another loop for if clause. Support for if clause is done earlier.
2734 OMPBuilder
.applySimd(CLI
, AlignedVars
,
2735 /*IfCond*/ nullptr, Order
, Simdlen
, Safelen
);
2741 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
2742 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
2743 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
,
2749 ParentLoopDirectiveForScanRegion
ScanRegion(*this, S
);
2750 OMPFirstScanLoop
= true;
2751 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
2752 emitOMPSimdRegion(CGF
, S
, Action
);
2756 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
2757 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
2758 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
, CodeGen
);
2760 // Check for outer lastprivate conditional update.
2761 checkForLastprivateConditionalUpdate(*this, S
);
2764 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective
&S
) {
2765 // Emit the de-sugared statement.
2766 OMPTransformDirectiveScopeRAII
TileScope(*this, &S
);
2767 EmitStmt(S
.getTransformedStmt());
2770 void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective
&S
) {
2771 // Emit the de-sugared statement.
2772 OMPTransformDirectiveScopeRAII
ReverseScope(*this, &S
);
2773 EmitStmt(S
.getTransformedStmt());
2776 void CodeGenFunction::EmitOMPInterchangeDirective(
2777 const OMPInterchangeDirective
&S
) {
2778 // Emit the de-sugared statement.
2779 OMPTransformDirectiveScopeRAII
InterchangeScope(*this, &S
);
2780 EmitStmt(S
.getTransformedStmt());
2783 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective
&S
) {
2784 bool UseOMPIRBuilder
= CGM
.getLangOpts().OpenMPIRBuilder
;
2786 if (UseOMPIRBuilder
) {
2787 auto DL
= SourceLocToDebugLoc(S
.getBeginLoc());
2788 const Stmt
*Inner
= S
.getRawStmt();
2790 // Consume nested loop. Clear the entire remaining loop stack because a
2791 // fully unrolled loop is non-transformable. For partial unrolling the
2792 // generated outer loop is pushed back to the stack.
2793 llvm::CanonicalLoopInfo
*CLI
= EmitOMPCollapsedCanonicalLoopNest(Inner
, 1);
2794 OMPLoopNestStack
.clear();
2796 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
2798 bool NeedsUnrolledCLI
= ExpectedOMPLoopDepth
>= 1;
2799 llvm::CanonicalLoopInfo
*UnrolledCLI
= nullptr;
2801 if (S
.hasClausesOfKind
<OMPFullClause
>()) {
2802 assert(ExpectedOMPLoopDepth
== 0);
2803 OMPBuilder
.unrollLoopFull(DL
, CLI
);
2804 } else if (auto *PartialClause
= S
.getSingleClause
<OMPPartialClause
>()) {
2805 uint64_t Factor
= 0;
2806 if (Expr
*FactorExpr
= PartialClause
->getFactor()) {
2807 Factor
= FactorExpr
->EvaluateKnownConstInt(getContext()).getZExtValue();
2808 assert(Factor
>= 1 && "Only positive factors are valid");
2810 OMPBuilder
.unrollLoopPartial(DL
, CLI
, Factor
,
2811 NeedsUnrolledCLI
? &UnrolledCLI
: nullptr);
2813 OMPBuilder
.unrollLoopHeuristic(DL
, CLI
);
2816 assert((!NeedsUnrolledCLI
|| UnrolledCLI
) &&
2817 "NeedsUnrolledCLI implies UnrolledCLI to be set");
2819 OMPLoopNestStack
.push_back(UnrolledCLI
);
2824 // This function is only called if the unrolled loop is not consumed by any
2825 // other loop-associated construct. Such a loop-associated construct will have
2826 // used the transformed AST.
2828 // Set the unroll metadata for the next emitted loop.
2829 LoopStack
.setUnrollState(LoopAttributes::Enable
);
2831 if (S
.hasClausesOfKind
<OMPFullClause
>()) {
2832 LoopStack
.setUnrollState(LoopAttributes::Full
);
2833 } else if (auto *PartialClause
= S
.getSingleClause
<OMPPartialClause
>()) {
2834 if (Expr
*FactorExpr
= PartialClause
->getFactor()) {
2836 FactorExpr
->EvaluateKnownConstInt(getContext()).getZExtValue();
2837 assert(Factor
>= 1 && "Only positive factors are valid");
2838 LoopStack
.setUnrollCount(Factor
);
2842 EmitStmt(S
.getAssociatedStmt());
2845 void CodeGenFunction::EmitOMPOuterLoop(
2846 bool DynamicOrOrdered
, bool IsMonotonic
, const OMPLoopDirective
&S
,
2847 CodeGenFunction::OMPPrivateScope
&LoopScope
,
2848 const CodeGenFunction::OMPLoopArguments
&LoopArgs
,
2849 const CodeGenFunction::CodeGenLoopTy
&CodeGenLoop
,
2850 const CodeGenFunction::CodeGenOrderedTy
&CodeGenOrdered
) {
2851 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
2853 const Expr
*IVExpr
= S
.getIterationVariable();
2854 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
2855 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
2857 JumpDest LoopExit
= getJumpDestInCurrentScope("omp.dispatch.end");
2859 // Start the loop with a block that tests the condition.
2860 llvm::BasicBlock
*CondBlock
= createBasicBlock("omp.dispatch.cond");
2861 EmitBlock(CondBlock
);
2862 const SourceRange R
= S
.getSourceRange();
2863 OMPLoopNestStack
.clear();
2864 LoopStack
.push(CondBlock
, SourceLocToDebugLoc(R
.getBegin()),
2865 SourceLocToDebugLoc(R
.getEnd()));
2867 llvm::Value
*BoolCondVal
= nullptr;
2868 if (!DynamicOrOrdered
) {
2869 // UB = min(UB, GlobalUB) or
2870 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2871 // 'distribute parallel for')
2872 EmitIgnoredExpr(LoopArgs
.EUB
);
2874 EmitIgnoredExpr(LoopArgs
.Init
);
2876 BoolCondVal
= EvaluateExprAsBool(LoopArgs
.Cond
);
2879 RT
.emitForNext(*this, S
.getBeginLoc(), IVSize
, IVSigned
, LoopArgs
.IL
,
2880 LoopArgs
.LB
, LoopArgs
.UB
, LoopArgs
.ST
);
2883 // If there are any cleanups between here and the loop-exit scope,
2884 // create a block to stage a loop exit along.
2885 llvm::BasicBlock
*ExitBlock
= LoopExit
.getBlock();
2886 if (LoopScope
.requiresCleanups())
2887 ExitBlock
= createBasicBlock("omp.dispatch.cleanup");
2889 llvm::BasicBlock
*LoopBody
= createBasicBlock("omp.dispatch.body");
2890 Builder
.CreateCondBr(BoolCondVal
, LoopBody
, ExitBlock
);
2891 if (ExitBlock
!= LoopExit
.getBlock()) {
2892 EmitBlock(ExitBlock
);
2893 EmitBranchThroughCleanup(LoopExit
);
2895 EmitBlock(LoopBody
);
2897 // Emit "IV = LB" (in case of static schedule, we have already calculated new
2898 // LB for loop condition and emitted it above).
2899 if (DynamicOrOrdered
)
2900 EmitIgnoredExpr(LoopArgs
.Init
);
2902 // Create a block for the increment.
2903 JumpDest Continue
= getJumpDestInCurrentScope("omp.dispatch.inc");
2904 BreakContinueStack
.push_back(BreakContinue(LoopExit
, Continue
));
2908 [&S
, IsMonotonic
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2909 // Generate !llvm.loop.parallel metadata for loads and stores for loops
2910 // with dynamic/guided scheduling and without ordered clause.
2911 if (!isOpenMPSimdDirective(S
.getDirectiveKind())) {
2912 CGF
.LoopStack
.setParallel(!IsMonotonic
);
2913 if (const auto *C
= S
.getSingleClause
<OMPOrderClause
>())
2914 if (C
->getKind() == OMPC_ORDER_concurrent
)
2915 CGF
.LoopStack
.setParallel(/*Enable=*/true);
2917 CGF
.EmitOMPSimdInit(S
);
2920 [&S
, &LoopArgs
, LoopExit
, &CodeGenLoop
, IVSize
, IVSigned
, &CodeGenOrdered
,
2921 &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2922 SourceLocation Loc
= S
.getBeginLoc();
2923 // when 'distribute' is not combined with a 'for':
2924 // while (idx <= UB) { BODY; ++idx; }
2925 // when 'distribute' is combined with a 'for'
2926 // (e.g. 'distribute parallel for')
2927 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2928 CGF
.EmitOMPInnerLoop(
2929 S
, LoopScope
.requiresCleanups(), LoopArgs
.Cond
, LoopArgs
.IncExpr
,
2930 [&S
, LoopExit
, &CodeGenLoop
](CodeGenFunction
&CGF
) {
2931 CodeGenLoop(CGF
, S
, LoopExit
);
2933 [IVSize
, IVSigned
, Loc
, &CodeGenOrdered
](CodeGenFunction
&CGF
) {
2934 CodeGenOrdered(CGF
, Loc
, IVSize
, IVSigned
);
2938 EmitBlock(Continue
.getBlock());
2939 BreakContinueStack
.pop_back();
2940 if (!DynamicOrOrdered
) {
2941 // Emit "LB = LB + Stride", "UB = UB + Stride".
2942 EmitIgnoredExpr(LoopArgs
.NextLB
);
2943 EmitIgnoredExpr(LoopArgs
.NextUB
);
2946 EmitBranch(CondBlock
);
2947 OMPLoopNestStack
.clear();
2949 // Emit the fall-through block.
2950 EmitBlock(LoopExit
.getBlock());
2952 // Tell the runtime we are done.
2953 auto &&CodeGen
= [DynamicOrOrdered
, &S
, &LoopArgs
](CodeGenFunction
&CGF
) {
2954 if (!DynamicOrOrdered
)
2955 CGF
.CGM
.getOpenMPRuntime().emitForStaticFinish(CGF
, S
.getEndLoc(),
2958 OMPCancelStack
.emitExit(*this, S
.getDirectiveKind(), CodeGen
);
2961 void CodeGenFunction::EmitOMPForOuterLoop(
2962 const OpenMPScheduleTy
&ScheduleKind
, bool IsMonotonic
,
2963 const OMPLoopDirective
&S
, OMPPrivateScope
&LoopScope
, bool Ordered
,
2964 const OMPLoopArguments
&LoopArgs
,
2965 const CodeGenDispatchBoundsTy
&CGDispatchBounds
) {
2966 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
2968 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2969 const bool DynamicOrOrdered
= Ordered
|| RT
.isDynamic(ScheduleKind
.Schedule
);
2971 assert((Ordered
|| !RT
.isStaticNonchunked(ScheduleKind
.Schedule
,
2972 LoopArgs
.Chunk
!= nullptr)) &&
2973 "static non-chunked schedule does not need outer loop");
2977 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2978 // When schedule(dynamic,chunk_size) is specified, the iterations are
2979 // distributed to threads in the team in chunks as the threads request them.
2980 // Each thread executes a chunk of iterations, then requests another chunk,
2981 // until no chunks remain to be distributed. Each chunk contains chunk_size
2982 // iterations, except for the last chunk to be distributed, which may have
2983 // fewer iterations. When no chunk_size is specified, it defaults to 1.
2985 // When schedule(guided,chunk_size) is specified, the iterations are assigned
2986 // to threads in the team in chunks as the executing threads request them.
2987 // Each thread executes a chunk of iterations, then requests another chunk,
2988 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2989 // each chunk is proportional to the number of unassigned iterations divided
2990 // by the number of threads in the team, decreasing to 1. For a chunk_size
2991 // with value k (greater than 1), the size of each chunk is determined in the
2992 // same way, with the restriction that the chunks do not contain fewer than k
2993 // iterations (except for the last chunk to be assigned, which may have fewer
2994 // than k iterations).
2996 // When schedule(auto) is specified, the decision regarding scheduling is
2997 // delegated to the compiler and/or runtime system. The programmer gives the
2998 // implementation the freedom to choose any possible mapping of iterations to
2999 // threads in the team.
3001 // When schedule(runtime) is specified, the decision regarding scheduling is
3002 // deferred until run time, and the schedule and chunk size are taken from the
3003 // run-sched-var ICV. If the ICV is set to auto, the schedule is
3004 // implementation defined
3006 // __kmpc_dispatch_init();
3007 // while(__kmpc_dispatch_next(&LB, &UB)) {
3009 // while (idx <= UB) { BODY; ++idx;
3010 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
3013 // __kmpc_dispatch_deinit();
3015 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3016 // When schedule(static, chunk_size) is specified, iterations are divided into
3017 // chunks of size chunk_size, and the chunks are assigned to the threads in
3018 // the team in a round-robin fashion in the order of the thread number.
3020 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
3021 // while (idx <= UB) { BODY; ++idx; } // inner loop
3027 const Expr
*IVExpr
= S
.getIterationVariable();
3028 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
3029 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
3031 if (DynamicOrOrdered
) {
3032 const std::pair
<llvm::Value
*, llvm::Value
*> DispatchBounds
=
3033 CGDispatchBounds(*this, S
, LoopArgs
.LB
, LoopArgs
.UB
);
3034 llvm::Value
*LBVal
= DispatchBounds
.first
;
3035 llvm::Value
*UBVal
= DispatchBounds
.second
;
3036 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues
= {LBVal
, UBVal
,
3038 RT
.emitForDispatchInit(*this, S
.getBeginLoc(), ScheduleKind
, IVSize
,
3039 IVSigned
, Ordered
, DipatchRTInputValues
);
3041 CGOpenMPRuntime::StaticRTInput
StaticInit(
3042 IVSize
, IVSigned
, Ordered
, LoopArgs
.IL
, LoopArgs
.LB
, LoopArgs
.UB
,
3043 LoopArgs
.ST
, LoopArgs
.Chunk
);
3044 RT
.emitForStaticInit(*this, S
.getBeginLoc(), S
.getDirectiveKind(),
3045 ScheduleKind
, StaticInit
);
3048 auto &&CodeGenOrdered
= [Ordered
](CodeGenFunction
&CGF
, SourceLocation Loc
,
3049 const unsigned IVSize
,
3050 const bool IVSigned
) {
3052 CGF
.CGM
.getOpenMPRuntime().emitForOrderedIterationEnd(CGF
, Loc
, IVSize
,
3057 OMPLoopArguments
OuterLoopArgs(LoopArgs
.LB
, LoopArgs
.UB
, LoopArgs
.ST
,
3058 LoopArgs
.IL
, LoopArgs
.Chunk
, LoopArgs
.EUB
);
3059 OuterLoopArgs
.IncExpr
= S
.getInc();
3060 OuterLoopArgs
.Init
= S
.getInit();
3061 OuterLoopArgs
.Cond
= S
.getCond();
3062 OuterLoopArgs
.NextLB
= S
.getNextLowerBound();
3063 OuterLoopArgs
.NextUB
= S
.getNextUpperBound();
3064 OuterLoopArgs
.DKind
= LoopArgs
.DKind
;
3065 EmitOMPOuterLoop(DynamicOrOrdered
, IsMonotonic
, S
, LoopScope
, OuterLoopArgs
,
3066 emitOMPLoopBodyWithStopPoint
, CodeGenOrdered
);
3067 if (DynamicOrOrdered
) {
3068 RT
.emitForDispatchDeinit(*this, S
.getBeginLoc());
3072 static void emitEmptyOrdered(CodeGenFunction
&, SourceLocation Loc
,
3073 const unsigned IVSize
, const bool IVSigned
) {}
3075 void CodeGenFunction::EmitOMPDistributeOuterLoop(
3076 OpenMPDistScheduleClauseKind ScheduleKind
, const OMPLoopDirective
&S
,
3077 OMPPrivateScope
&LoopScope
, const OMPLoopArguments
&LoopArgs
,
3078 const CodeGenLoopTy
&CodeGenLoopContent
) {
3080 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
3083 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3087 const Expr
*IVExpr
= S
.getIterationVariable();
3088 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
3089 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
3091 CGOpenMPRuntime::StaticRTInput
StaticInit(
3092 IVSize
, IVSigned
, /* Ordered = */ false, LoopArgs
.IL
, LoopArgs
.LB
,
3093 LoopArgs
.UB
, LoopArgs
.ST
, LoopArgs
.Chunk
);
3094 RT
.emitDistributeStaticInit(*this, S
.getBeginLoc(), ScheduleKind
, StaticInit
);
3096 // for combined 'distribute' and 'for' the increment expression of distribute
3097 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3099 if (isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind()))
3100 IncExpr
= S
.getDistInc();
3102 IncExpr
= S
.getInc();
3104 // this routine is shared by 'omp distribute parallel for' and
3105 // 'omp distribute': select the right EUB expression depending on the
3107 OMPLoopArguments OuterLoopArgs
;
3108 OuterLoopArgs
.LB
= LoopArgs
.LB
;
3109 OuterLoopArgs
.UB
= LoopArgs
.UB
;
3110 OuterLoopArgs
.ST
= LoopArgs
.ST
;
3111 OuterLoopArgs
.IL
= LoopArgs
.IL
;
3112 OuterLoopArgs
.Chunk
= LoopArgs
.Chunk
;
3113 OuterLoopArgs
.EUB
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3114 ? S
.getCombinedEnsureUpperBound()
3115 : S
.getEnsureUpperBound();
3116 OuterLoopArgs
.IncExpr
= IncExpr
;
3117 OuterLoopArgs
.Init
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3118 ? S
.getCombinedInit()
3120 OuterLoopArgs
.Cond
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3121 ? S
.getCombinedCond()
3123 OuterLoopArgs
.NextLB
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3124 ? S
.getCombinedNextLowerBound()
3125 : S
.getNextLowerBound();
3126 OuterLoopArgs
.NextUB
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3127 ? S
.getCombinedNextUpperBound()
3128 : S
.getNextUpperBound();
3129 OuterLoopArgs
.DKind
= OMPD_distribute
;
3131 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S
,
3132 LoopScope
, OuterLoopArgs
, CodeGenLoopContent
,
3136 static std::pair
<LValue
, LValue
>
3137 emitDistributeParallelForInnerBounds(CodeGenFunction
&CGF
,
3138 const OMPExecutableDirective
&S
) {
3139 const OMPLoopDirective
&LS
= cast
<OMPLoopDirective
>(S
);
3141 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getLowerBoundVariable()));
3143 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getUpperBoundVariable()));
3145 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3146 // parallel for') we need to use the 'distribute'
3147 // chunk lower and upper bounds rather than the whole loop iteration
3148 // space. These are parameters to the outlined function for 'parallel'
3149 // and we copy the bounds of the previous schedule into the
3150 // the current ones.
3151 LValue PrevLB
= CGF
.EmitLValue(LS
.getPrevLowerBoundVariable());
3152 LValue PrevUB
= CGF
.EmitLValue(LS
.getPrevUpperBoundVariable());
3153 llvm::Value
*PrevLBVal
= CGF
.EmitLoadOfScalar(
3154 PrevLB
, LS
.getPrevLowerBoundVariable()->getExprLoc());
3155 PrevLBVal
= CGF
.EmitScalarConversion(
3156 PrevLBVal
, LS
.getPrevLowerBoundVariable()->getType(),
3157 LS
.getIterationVariable()->getType(),
3158 LS
.getPrevLowerBoundVariable()->getExprLoc());
3159 llvm::Value
*PrevUBVal
= CGF
.EmitLoadOfScalar(
3160 PrevUB
, LS
.getPrevUpperBoundVariable()->getExprLoc());
3161 PrevUBVal
= CGF
.EmitScalarConversion(
3162 PrevUBVal
, LS
.getPrevUpperBoundVariable()->getType(),
3163 LS
.getIterationVariable()->getType(),
3164 LS
.getPrevUpperBoundVariable()->getExprLoc());
3166 CGF
.EmitStoreOfScalar(PrevLBVal
, LB
);
3167 CGF
.EmitStoreOfScalar(PrevUBVal
, UB
);
3172 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3173 /// we need to use the LB and UB expressions generated by the worksharing
3174 /// code generation support, whereas in non combined situations we would
3175 /// just emit 0 and the LastIteration expression
3176 /// This function is necessary due to the difference of the LB and UB
3177 /// types for the RT emission routines for 'for_static_init' and
3178 /// 'for_dispatch_init'
3179 static std::pair
<llvm::Value
*, llvm::Value
*>
3180 emitDistributeParallelForDispatchBounds(CodeGenFunction
&CGF
,
3181 const OMPExecutableDirective
&S
,
3182 Address LB
, Address UB
) {
3183 const OMPLoopDirective
&LS
= cast
<OMPLoopDirective
>(S
);
3184 const Expr
*IVExpr
= LS
.getIterationVariable();
3185 // when implementing a dynamic schedule for a 'for' combined with a
3186 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3187 // is not normalized as each team only executes its own assigned
3189 QualType IteratorTy
= IVExpr
->getType();
3190 llvm::Value
*LBVal
=
3191 CGF
.EmitLoadOfScalar(LB
, /*Volatile=*/false, IteratorTy
, S
.getBeginLoc());
3192 llvm::Value
*UBVal
=
3193 CGF
.EmitLoadOfScalar(UB
, /*Volatile=*/false, IteratorTy
, S
.getBeginLoc());
3194 return {LBVal
, UBVal
};
3197 static void emitDistributeParallelForDistributeInnerBoundParams(
3198 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
3199 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
) {
3200 const auto &Dir
= cast
<OMPLoopDirective
>(S
);
3202 CGF
.EmitLValue(cast
<DeclRefExpr
>(Dir
.getCombinedLowerBoundVariable()));
3203 llvm::Value
*LBCast
= CGF
.Builder
.CreateIntCast(
3204 CGF
.Builder
.CreateLoad(LB
.getAddress()), CGF
.SizeTy
, /*isSigned=*/false);
3205 CapturedVars
.push_back(LBCast
);
3207 CGF
.EmitLValue(cast
<DeclRefExpr
>(Dir
.getCombinedUpperBoundVariable()));
3209 llvm::Value
*UBCast
= CGF
.Builder
.CreateIntCast(
3210 CGF
.Builder
.CreateLoad(UB
.getAddress()), CGF
.SizeTy
, /*isSigned=*/false);
3211 CapturedVars
.push_back(UBCast
);
3215 emitInnerParallelForWhenCombined(CodeGenFunction
&CGF
,
3216 const OMPLoopDirective
&S
,
3217 CodeGenFunction::JumpDest LoopExit
) {
3218 auto &&CGInlinedWorksharingLoop
= [&S
](CodeGenFunction
&CGF
,
3219 PrePostActionTy
&Action
) {
3221 bool HasCancel
= false;
3222 if (!isOpenMPSimdDirective(S
.getDirectiveKind())) {
3223 if (const auto *D
= dyn_cast
<OMPTeamsDistributeParallelForDirective
>(&S
))
3224 HasCancel
= D
->hasCancel();
3225 else if (const auto *D
= dyn_cast
<OMPDistributeParallelForDirective
>(&S
))
3226 HasCancel
= D
->hasCancel();
3227 else if (const auto *D
=
3228 dyn_cast
<OMPTargetTeamsDistributeParallelForDirective
>(&S
))
3229 HasCancel
= D
->hasCancel();
3231 CodeGenFunction::OMPCancelStackRAII
CancelRegion(CGF
, S
.getDirectiveKind(),
3233 CGF
.EmitOMPWorksharingLoop(S
, S
.getPrevEnsureUpperBound(),
3234 emitDistributeParallelForInnerBounds
,
3235 emitDistributeParallelForDispatchBounds
);
3238 emitCommonOMPParallelDirective(
3240 isOpenMPSimdDirective(S
.getDirectiveKind()) ? OMPD_for_simd
: OMPD_for
,
3241 CGInlinedWorksharingLoop
,
3242 emitDistributeParallelForDistributeInnerBoundParams
);
3245 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3246 const OMPDistributeParallelForDirective
&S
) {
3247 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3248 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
3251 OMPLexicalScope
Scope(*this, S
, OMPD_parallel
);
3252 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute
, CodeGen
);
3255 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3256 const OMPDistributeParallelForSimdDirective
&S
) {
3257 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3258 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
3261 OMPLexicalScope
Scope(*this, S
, OMPD_parallel
);
3262 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute
, CodeGen
);
3265 void CodeGenFunction::EmitOMPDistributeSimdDirective(
3266 const OMPDistributeSimdDirective
&S
) {
3267 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3268 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
3270 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
3271 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
, CodeGen
);
3274 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3275 CodeGenModule
&CGM
, StringRef ParentName
, const OMPTargetSimdDirective
&S
) {
3276 // Emit SPMD target parallel for region as a standalone region.
3277 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
3278 emitOMPSimdRegion(CGF
, S
, Action
);
3281 llvm::Constant
*Addr
;
3282 // Emit target region as a standalone region.
3283 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
3284 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
3285 assert(Fn
&& Addr
&& "Target device function emission failed.");
3288 void CodeGenFunction::EmitOMPTargetSimdDirective(
3289 const OMPTargetSimdDirective
&S
) {
3290 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
3291 emitOMPSimdRegion(CGF
, S
, Action
);
3293 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
3297 struct ScheduleKindModifiersTy
{
3298 OpenMPScheduleClauseKind Kind
;
3299 OpenMPScheduleClauseModifier M1
;
3300 OpenMPScheduleClauseModifier M2
;
3301 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind
,
3302 OpenMPScheduleClauseModifier M1
,
3303 OpenMPScheduleClauseModifier M2
)
3304 : Kind(Kind
), M1(M1
), M2(M2
) {}
3308 bool CodeGenFunction::EmitOMPWorksharingLoop(
3309 const OMPLoopDirective
&S
, Expr
*EUB
,
3310 const CodeGenLoopBoundsTy
&CodeGenLoopBounds
,
3311 const CodeGenDispatchBoundsTy
&CGDispatchBounds
) {
3312 // Emit the loop iteration variable.
3313 const auto *IVExpr
= cast
<DeclRefExpr
>(S
.getIterationVariable());
3314 const auto *IVDecl
= cast
<VarDecl
>(IVExpr
->getDecl());
3315 EmitVarDecl(*IVDecl
);
3317 // Emit the iterations count variable.
3318 // If it is not a variable, Sema decided to calculate iterations count on each
3319 // iteration (e.g., it is foldable into a constant).
3320 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
3321 EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
3322 // Emit calculation of the iterations count.
3323 EmitIgnoredExpr(S
.getCalcLastIteration());
3326 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
3328 bool HasLastprivateClause
;
3329 // Check pre-condition.
3331 OMPLoopScope
PreInitScope(*this, S
);
3332 // Skip the entire loop if we don't meet the precondition.
3333 // If the condition constant folds and can be elided, avoid emitting the
3336 llvm::BasicBlock
*ContBlock
= nullptr;
3337 if (ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
3341 llvm::BasicBlock
*ThenBlock
= createBasicBlock("omp.precond.then");
3342 ContBlock
= createBasicBlock("omp.precond.end");
3343 emitPreCond(*this, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
3344 getProfileCount(&S
));
3345 EmitBlock(ThenBlock
);
3346 incrementProfileCounter(&S
);
3349 RunCleanupsScope
DoacrossCleanupScope(*this);
3350 bool Ordered
= false;
3351 if (const auto *OrderedClause
= S
.getSingleClause
<OMPOrderedClause
>()) {
3352 if (OrderedClause
->getNumForLoops())
3353 RT
.emitDoacrossInit(*this, S
, OrderedClause
->getLoopNumIterations());
3358 llvm::DenseSet
<const Expr
*> EmittedFinals
;
3359 emitAlignedClause(*this, S
);
3360 bool HasLinears
= EmitOMPLinearClauseInit(S
);
3361 // Emit helper vars inits.
3363 std::pair
<LValue
, LValue
> Bounds
= CodeGenLoopBounds(*this, S
);
3364 LValue LB
= Bounds
.first
;
3365 LValue UB
= Bounds
.second
;
3367 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getStrideVariable()));
3369 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getIsLastIterVariable()));
3371 // Emit 'then' code.
3373 OMPPrivateScope
LoopScope(*this);
3374 if (EmitOMPFirstprivateClause(S
, LoopScope
) || HasLinears
) {
3375 // Emit implicit barrier to synchronize threads and avoid data races on
3376 // initialization of firstprivate variables and post-update of
3377 // lastprivate variables.
3378 CGM
.getOpenMPRuntime().emitBarrierCall(
3379 *this, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
3380 /*ForceSimpleCall=*/true);
3382 EmitOMPPrivateClause(S
, LoopScope
);
3383 CGOpenMPRuntime::LastprivateConditionalRAII
LPCRegion(
3384 *this, S
, EmitLValue(S
.getIterationVariable()));
3385 HasLastprivateClause
= EmitOMPLastprivateClauseInit(S
, LoopScope
);
3386 EmitOMPReductionClauseInit(S
, LoopScope
);
3387 EmitOMPPrivateLoopCounters(S
, LoopScope
);
3388 EmitOMPLinearClause(S
, LoopScope
);
3389 (void)LoopScope
.Privatize();
3390 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
3391 CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S
);
3393 // Detect the loop schedule kind and chunk.
3394 const Expr
*ChunkExpr
= nullptr;
3395 OpenMPScheduleTy ScheduleKind
;
3396 if (const auto *C
= S
.getSingleClause
<OMPScheduleClause
>()) {
3397 ScheduleKind
.Schedule
= C
->getScheduleKind();
3398 ScheduleKind
.M1
= C
->getFirstScheduleModifier();
3399 ScheduleKind
.M2
= C
->getSecondScheduleModifier();
3400 ChunkExpr
= C
->getChunkSize();
3402 // Default behaviour for schedule clause.
3403 CGM
.getOpenMPRuntime().getDefaultScheduleAndChunk(
3404 *this, S
, ScheduleKind
.Schedule
, ChunkExpr
);
3406 bool HasChunkSizeOne
= false;
3407 llvm::Value
*Chunk
= nullptr;
3409 Chunk
= EmitScalarExpr(ChunkExpr
);
3410 Chunk
= EmitScalarConversion(Chunk
, ChunkExpr
->getType(),
3411 S
.getIterationVariable()->getType(),
3413 Expr::EvalResult Result
;
3414 if (ChunkExpr
->EvaluateAsInt(Result
, getContext())) {
3415 llvm::APSInt EvaluatedChunk
= Result
.Val
.getInt();
3416 HasChunkSizeOne
= (EvaluatedChunk
.getLimitedValue() == 1);
3419 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
3420 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
3421 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3422 // If the static schedule kind is specified or if the ordered clause is
3423 // specified, and if no monotonic modifier is specified, the effect will
3424 // be as if the monotonic modifier was specified.
3425 bool StaticChunkedOne
=
3426 RT
.isStaticChunked(ScheduleKind
.Schedule
,
3427 /* Chunked */ Chunk
!= nullptr) &&
3429 isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind());
3432 (ScheduleKind
.Schedule
== OMPC_SCHEDULE_static
&&
3433 !(ScheduleKind
.M1
== OMPC_SCHEDULE_MODIFIER_nonmonotonic
||
3434 ScheduleKind
.M2
== OMPC_SCHEDULE_MODIFIER_nonmonotonic
)) ||
3435 ScheduleKind
.M1
== OMPC_SCHEDULE_MODIFIER_monotonic
||
3436 ScheduleKind
.M2
== OMPC_SCHEDULE_MODIFIER_monotonic
;
3437 if ((RT
.isStaticNonchunked(ScheduleKind
.Schedule
,
3438 /* Chunked */ Chunk
!= nullptr) ||
3439 StaticChunkedOne
) &&
3442 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3445 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3446 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
3447 CGF
.EmitOMPSimdInit(S
);
3448 } else if (const auto *C
= S
.getSingleClause
<OMPOrderClause
>()) {
3449 if (C
->getKind() == OMPC_ORDER_concurrent
)
3450 CGF
.LoopStack
.setParallel(/*Enable=*/true);
3453 [IVSize
, IVSigned
, Ordered
, IL
, LB
, UB
, ST
, StaticChunkedOne
, Chunk
,
3454 &S
, ScheduleKind
, LoopExit
,
3455 &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3456 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3457 // When no chunk_size is specified, the iteration space is divided
3458 // into chunks that are approximately equal in size, and at most
3459 // one chunk is distributed to each thread. Note that the size of
3460 // the chunks is unspecified in this case.
3461 CGOpenMPRuntime::StaticRTInput
StaticInit(
3462 IVSize
, IVSigned
, Ordered
, IL
.getAddress(), LB
.getAddress(),
3463 UB
.getAddress(), ST
.getAddress(),
3464 StaticChunkedOne
? Chunk
: nullptr);
3465 CGF
.CGM
.getOpenMPRuntime().emitForStaticInit(
3466 CGF
, S
.getBeginLoc(), S
.getDirectiveKind(), ScheduleKind
,
3468 // UB = min(UB, GlobalUB);
3469 if (!StaticChunkedOne
)
3470 CGF
.EmitIgnoredExpr(S
.getEnsureUpperBound());
3472 CGF
.EmitIgnoredExpr(S
.getInit());
3473 // For unchunked static schedule generate:
3475 // while (idx <= UB) {
3480 // For static schedule with chunk one:
3482 // while (IV <= PrevUB) {
3486 CGF
.EmitOMPInnerLoop(
3487 S
, LoopScope
.requiresCleanups(),
3488 StaticChunkedOne
? S
.getCombinedParForInDistCond()
3490 StaticChunkedOne
? S
.getDistInc() : S
.getInc(),
3491 [&S
, LoopExit
](CodeGenFunction
&CGF
) {
3492 emitOMPLoopBodyWithStopPoint(CGF
, S
, LoopExit
);
3494 [](CodeGenFunction
&) {});
3496 EmitBlock(LoopExit
.getBlock());
3497 // Tell the runtime we are done.
3498 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
) {
3499 CGF
.CGM
.getOpenMPRuntime().emitForStaticFinish(CGF
, S
.getEndLoc(),
3502 OMPCancelStack
.emitExit(*this, S
.getDirectiveKind(), CodeGen
);
3504 // Emit the outer loop, which requests its work chunk [LB..UB] from
3505 // runtime and runs the inner loop to process it.
3506 OMPLoopArguments
LoopArguments(LB
.getAddress(), UB
.getAddress(),
3507 ST
.getAddress(), IL
.getAddress(), Chunk
,
3509 LoopArguments
.DKind
= OMPD_for
;
3510 EmitOMPForOuterLoop(ScheduleKind
, IsMonotonic
, S
, LoopScope
, Ordered
,
3511 LoopArguments
, CGDispatchBounds
);
3513 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
3514 EmitOMPSimdFinal(S
, [IL
, &S
](CodeGenFunction
&CGF
) {
3515 return CGF
.Builder
.CreateIsNotNull(
3516 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
3519 EmitOMPReductionClauseFinal(
3520 S
, /*ReductionKind=*/isOpenMPSimdDirective(S
.getDirectiveKind())
3521 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3522 : /*Parallel only*/ OMPD_parallel
);
3523 // Emit post-update of the reduction variables if IsLastIter != 0.
3524 emitPostUpdateForReductionClause(
3525 *this, S
, [IL
, &S
](CodeGenFunction
&CGF
) {
3526 return CGF
.Builder
.CreateIsNotNull(
3527 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
3529 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3530 if (HasLastprivateClause
)
3531 EmitOMPLastprivateClauseFinal(
3532 S
, isOpenMPSimdDirective(S
.getDirectiveKind()),
3533 Builder
.CreateIsNotNull(EmitLoadOfScalar(IL
, S
.getBeginLoc())));
3534 LoopScope
.restoreMap();
3535 EmitOMPLinearClauseFinal(S
, [IL
, &S
](CodeGenFunction
&CGF
) {
3536 return CGF
.Builder
.CreateIsNotNull(
3537 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
3540 DoacrossCleanupScope
.ForceCleanup();
3541 // We're now done with the loop, so jump to the continuation block.
3543 EmitBranch(ContBlock
);
3544 EmitBlock(ContBlock
, /*IsFinished=*/true);
3547 return HasLastprivateClause
;
3550 /// The following two functions generate expressions for the loop lower
3551 /// and upper bounds in case of static and dynamic (dispatch) schedule
3552 /// of the associated 'for' or 'distribute' loop.
3553 static std::pair
<LValue
, LValue
>
3554 emitForLoopBounds(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
3555 const auto &LS
= cast
<OMPLoopDirective
>(S
);
3557 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getLowerBoundVariable()));
3559 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getUpperBoundVariable()));
3563 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3564 /// consider the lower and upper bound expressions generated by the
3565 /// worksharing loop support, but we use 0 and the iteration space size as
3567 static std::pair
<llvm::Value
*, llvm::Value
*>
3568 emitDispatchForLoopBounds(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
3569 Address LB
, Address UB
) {
3570 const auto &LS
= cast
<OMPLoopDirective
>(S
);
3571 const Expr
*IVExpr
= LS
.getIterationVariable();
3572 const unsigned IVSize
= CGF
.getContext().getTypeSize(IVExpr
->getType());
3573 llvm::Value
*LBVal
= CGF
.Builder
.getIntN(IVSize
, 0);
3574 llvm::Value
*UBVal
= CGF
.EmitScalarExpr(LS
.getLastIteration());
3575 return {LBVal
, UBVal
};
3578 /// Emits internal temp array declarations for the directive with inscan
3580 /// The code is the following:
3582 /// size num_iters = <num_iters>;
3583 /// <type> buffer[num_iters];
3585 static void emitScanBasedDirectiveDecls(
3586 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
3587 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> NumIteratorsGen
) {
3588 llvm::Value
*OMPScanNumIterations
= CGF
.Builder
.CreateIntCast(
3589 NumIteratorsGen(CGF
), CGF
.SizeTy
, /*isSigned=*/false);
3590 SmallVector
<const Expr
*, 4> Shareds
;
3591 SmallVector
<const Expr
*, 4> Privates
;
3592 SmallVector
<const Expr
*, 4> ReductionOps
;
3593 SmallVector
<const Expr
*, 4> CopyArrayTemps
;
3594 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
3595 assert(C
->getModifier() == OMPC_REDUCTION_inscan
&&
3596 "Only inscan reductions are expected.");
3597 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
3598 Privates
.append(C
->privates().begin(), C
->privates().end());
3599 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
3600 CopyArrayTemps
.append(C
->copy_array_temps().begin(),
3601 C
->copy_array_temps().end());
3604 // Emit buffers for each reduction variables.
3605 // ReductionCodeGen is required to emit correctly the code for array
3607 ReductionCodeGen
RedCG(Shareds
, Shareds
, Privates
, ReductionOps
);
3609 auto *ITA
= CopyArrayTemps
.begin();
3610 for (const Expr
*IRef
: Privates
) {
3611 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IRef
)->getDecl());
3612 // Emit variably modified arrays, used for arrays/array sections
3614 if (PrivateVD
->getType()->isVariablyModifiedType()) {
3615 RedCG
.emitSharedOrigLValue(CGF
, Count
);
3616 RedCG
.emitAggregateType(CGF
, Count
);
3618 CodeGenFunction::OpaqueValueMapping
DimMapping(
3620 cast
<OpaqueValueExpr
>(
3621 cast
<VariableArrayType
>((*ITA
)->getType()->getAsArrayTypeUnsafe())
3623 RValue::get(OMPScanNumIterations
));
3624 // Emit temp buffer.
3625 CGF
.EmitVarDecl(*cast
<VarDecl
>(cast
<DeclRefExpr
>(*ITA
)->getDecl()));
3632 /// Copies final inscan reductions values to the original variables.
3633 /// The code is the following:
3635 /// <orig_var> = buffer[num_iters-1];
3637 static void emitScanBasedDirectiveFinals(
3638 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
3639 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> NumIteratorsGen
) {
3640 llvm::Value
*OMPScanNumIterations
= CGF
.Builder
.CreateIntCast(
3641 NumIteratorsGen(CGF
), CGF
.SizeTy
, /*isSigned=*/false);
3642 SmallVector
<const Expr
*, 4> Shareds
;
3643 SmallVector
<const Expr
*, 4> LHSs
;
3644 SmallVector
<const Expr
*, 4> RHSs
;
3645 SmallVector
<const Expr
*, 4> Privates
;
3646 SmallVector
<const Expr
*, 4> CopyOps
;
3647 SmallVector
<const Expr
*, 4> CopyArrayElems
;
3648 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
3649 assert(C
->getModifier() == OMPC_REDUCTION_inscan
&&
3650 "Only inscan reductions are expected.");
3651 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
3652 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
3653 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
3654 Privates
.append(C
->privates().begin(), C
->privates().end());
3655 CopyOps
.append(C
->copy_ops().begin(), C
->copy_ops().end());
3656 CopyArrayElems
.append(C
->copy_array_elems().begin(),
3657 C
->copy_array_elems().end());
3659 // Create temp var and copy LHS value to this temp value.
3660 // LHS = TMP[LastIter];
3661 llvm::Value
*OMPLast
= CGF
.Builder
.CreateNSWSub(
3662 OMPScanNumIterations
,
3663 llvm::ConstantInt::get(CGF
.SizeTy
, 1, /*isSigned=*/false));
3664 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
3665 const Expr
*PrivateExpr
= Privates
[I
];
3666 const Expr
*OrigExpr
= Shareds
[I
];
3667 const Expr
*CopyArrayElem
= CopyArrayElems
[I
];
3668 CodeGenFunction::OpaqueValueMapping
IdxMapping(
3670 cast
<OpaqueValueExpr
>(
3671 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
3672 RValue::get(OMPLast
));
3673 LValue DestLVal
= CGF
.EmitLValue(OrigExpr
);
3674 LValue SrcLVal
= CGF
.EmitLValue(CopyArrayElem
);
3676 PrivateExpr
->getType(), DestLVal
.getAddress(), SrcLVal
.getAddress(),
3677 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
3678 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()), CopyOps
[I
]);
3682 /// Emits the code for the directive with inscan reductions.
3683 /// The code is the following:
3686 /// for (i: 0..<num_iters>) {
3688 /// buffer[i] = red;
3690 /// #pragma omp master // in parallel region
3691 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3692 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3693 /// buffer[i] op= buffer[i-pow(2,k)];
3694 /// #pragma omp barrier // in parallel region
3696 /// for (0..<num_iters>) {
3697 /// red = InclusiveScan ? buffer[i] : buffer[i-1];
3701 static void emitScanBasedDirective(
3702 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
3703 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> NumIteratorsGen
,
3704 llvm::function_ref
<void(CodeGenFunction
&)> FirstGen
,
3705 llvm::function_ref
<void(CodeGenFunction
&)> SecondGen
) {
3706 llvm::Value
*OMPScanNumIterations
= CGF
.Builder
.CreateIntCast(
3707 NumIteratorsGen(CGF
), CGF
.SizeTy
, /*isSigned=*/false);
3708 SmallVector
<const Expr
*, 4> Privates
;
3709 SmallVector
<const Expr
*, 4> ReductionOps
;
3710 SmallVector
<const Expr
*, 4> LHSs
;
3711 SmallVector
<const Expr
*, 4> RHSs
;
3712 SmallVector
<const Expr
*, 4> CopyArrayElems
;
3713 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
3714 assert(C
->getModifier() == OMPC_REDUCTION_inscan
&&
3715 "Only inscan reductions are expected.");
3716 Privates
.append(C
->privates().begin(), C
->privates().end());
3717 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
3718 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
3719 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
3720 CopyArrayElems
.append(C
->copy_array_elems().begin(),
3721 C
->copy_array_elems().end());
3723 CodeGenFunction::ParentLoopDirectiveForScanRegion
ScanRegion(CGF
, S
);
3725 // Emit loop with input phase:
3727 // for (i: 0..<num_iters>) {
3731 CGF
.OMPFirstScanLoop
= true;
3732 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
3735 // #pragma omp barrier // in parallel region
3736 auto &&CodeGen
= [&S
, OMPScanNumIterations
, &LHSs
, &RHSs
, &CopyArrayElems
,
3738 &Privates
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
3740 // Emit prefix reduction:
3741 // #pragma omp master // in parallel region
3742 // for (int k = 0; k <= ceil(log2(n)); ++k)
3743 llvm::BasicBlock
*InputBB
= CGF
.Builder
.GetInsertBlock();
3744 llvm::BasicBlock
*LoopBB
= CGF
.createBasicBlock("omp.outer.log.scan.body");
3745 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock("omp.outer.log.scan.exit");
3747 CGF
.CGM
.getIntrinsic(llvm::Intrinsic::log2
, CGF
.DoubleTy
);
3749 CGF
.Builder
.CreateUIToFP(OMPScanNumIterations
, CGF
.DoubleTy
);
3750 llvm::Value
*LogVal
= CGF
.EmitNounwindRuntimeCall(F
, Arg
);
3751 F
= CGF
.CGM
.getIntrinsic(llvm::Intrinsic::ceil
, CGF
.DoubleTy
);
3752 LogVal
= CGF
.EmitNounwindRuntimeCall(F
, LogVal
);
3753 LogVal
= CGF
.Builder
.CreateFPToUI(LogVal
, CGF
.IntTy
);
3754 llvm::Value
*NMin1
= CGF
.Builder
.CreateNUWSub(
3755 OMPScanNumIterations
, llvm::ConstantInt::get(CGF
.SizeTy
, 1));
3756 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, S
.getBeginLoc());
3757 CGF
.EmitBlock(LoopBB
);
3758 auto *Counter
= CGF
.Builder
.CreatePHI(CGF
.IntTy
, 2);
3760 auto *Pow2K
= CGF
.Builder
.CreatePHI(CGF
.SizeTy
, 2);
3761 Counter
->addIncoming(llvm::ConstantInt::get(CGF
.IntTy
, 0), InputBB
);
3762 Pow2K
->addIncoming(llvm::ConstantInt::get(CGF
.SizeTy
, 1), InputBB
);
3763 // for (size i = n - 1; i >= 2 ^ k; --i)
3764 // tmp[i] op= tmp[i-pow2k];
3765 llvm::BasicBlock
*InnerLoopBB
=
3766 CGF
.createBasicBlock("omp.inner.log.scan.body");
3767 llvm::BasicBlock
*InnerExitBB
=
3768 CGF
.createBasicBlock("omp.inner.log.scan.exit");
3769 llvm::Value
*CmpI
= CGF
.Builder
.CreateICmpUGE(NMin1
, Pow2K
);
3770 CGF
.Builder
.CreateCondBr(CmpI
, InnerLoopBB
, InnerExitBB
);
3771 CGF
.EmitBlock(InnerLoopBB
);
3772 auto *IVal
= CGF
.Builder
.CreatePHI(CGF
.SizeTy
, 2);
3773 IVal
->addIncoming(NMin1
, LoopBB
);
3775 CodeGenFunction::OMPPrivateScope
PrivScope(CGF
);
3776 auto *ILHS
= LHSs
.begin();
3777 auto *IRHS
= RHSs
.begin();
3778 for (const Expr
*CopyArrayElem
: CopyArrayElems
) {
3779 const auto *LHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
3780 const auto *RHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
3781 Address LHSAddr
= Address::invalid();
3783 CodeGenFunction::OpaqueValueMapping
IdxMapping(
3785 cast
<OpaqueValueExpr
>(
3786 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
3788 LHSAddr
= CGF
.EmitLValue(CopyArrayElem
).getAddress();
3790 PrivScope
.addPrivate(LHSVD
, LHSAddr
);
3791 Address RHSAddr
= Address::invalid();
3793 llvm::Value
*OffsetIVal
= CGF
.Builder
.CreateNUWSub(IVal
, Pow2K
);
3794 CodeGenFunction::OpaqueValueMapping
IdxMapping(
3796 cast
<OpaqueValueExpr
>(
3797 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
3798 RValue::get(OffsetIVal
));
3799 RHSAddr
= CGF
.EmitLValue(CopyArrayElem
).getAddress();
3801 PrivScope
.addPrivate(RHSVD
, RHSAddr
);
3805 PrivScope
.Privatize();
3806 CGF
.CGM
.getOpenMPRuntime().emitReduction(
3807 CGF
, S
.getEndLoc(), Privates
, LHSs
, RHSs
, ReductionOps
,
3808 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown
});
3810 llvm::Value
*NextIVal
=
3811 CGF
.Builder
.CreateNUWSub(IVal
, llvm::ConstantInt::get(CGF
.SizeTy
, 1));
3812 IVal
->addIncoming(NextIVal
, CGF
.Builder
.GetInsertBlock());
3813 CmpI
= CGF
.Builder
.CreateICmpUGE(NextIVal
, Pow2K
);
3814 CGF
.Builder
.CreateCondBr(CmpI
, InnerLoopBB
, InnerExitBB
);
3815 CGF
.EmitBlock(InnerExitBB
);
3817 CGF
.Builder
.CreateNUWAdd(Counter
, llvm::ConstantInt::get(CGF
.IntTy
, 1));
3818 Counter
->addIncoming(Next
, CGF
.Builder
.GetInsertBlock());
3820 llvm::Value
*NextPow2K
=
3821 CGF
.Builder
.CreateShl(Pow2K
, 1, "", /*HasNUW=*/true);
3822 Pow2K
->addIncoming(NextPow2K
, CGF
.Builder
.GetInsertBlock());
3823 llvm::Value
*Cmp
= CGF
.Builder
.CreateICmpNE(Next
, LogVal
);
3824 CGF
.Builder
.CreateCondBr(Cmp
, LoopBB
, ExitBB
);
3825 auto DL1
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, S
.getEndLoc());
3826 CGF
.EmitBlock(ExitBB
);
3828 if (isOpenMPParallelDirective(S
.getDirectiveKind())) {
3829 CGF
.CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, CodeGen
, S
.getBeginLoc());
3830 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(
3831 CGF
, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
3832 /*ForceSimpleCall=*/true);
3834 RegionCodeGenTy
RCG(CodeGen
);
3838 CGF
.OMPFirstScanLoop
= false;
3842 static bool emitWorksharingDirective(CodeGenFunction
&CGF
,
3843 const OMPLoopDirective
&S
,
3845 bool HasLastprivates
;
3846 if (llvm::any_of(S
.getClausesOfKind
<OMPReductionClause
>(),
3847 [](const OMPReductionClause
*C
) {
3848 return C
->getModifier() == OMPC_REDUCTION_inscan
;
3850 const auto &&NumIteratorsGen
= [&S
](CodeGenFunction
&CGF
) {
3851 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
3852 OMPLoopScope
LoopScope(CGF
, S
);
3853 return CGF
.EmitScalarExpr(S
.getNumIterations());
3855 const auto &&FirstGen
= [&S
, HasCancel
](CodeGenFunction
&CGF
) {
3856 CodeGenFunction::OMPCancelStackRAII
CancelRegion(
3857 CGF
, S
.getDirectiveKind(), HasCancel
);
3858 (void)CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(),
3860 emitDispatchForLoopBounds
);
3861 // Emit an implicit barrier at the end.
3862 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(CGF
, S
.getBeginLoc(),
3865 const auto &&SecondGen
= [&S
, HasCancel
,
3866 &HasLastprivates
](CodeGenFunction
&CGF
) {
3867 CodeGenFunction::OMPCancelStackRAII
CancelRegion(
3868 CGF
, S
.getDirectiveKind(), HasCancel
);
3869 HasLastprivates
= CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(),
3871 emitDispatchForLoopBounds
);
3873 if (!isOpenMPParallelDirective(S
.getDirectiveKind()))
3874 emitScanBasedDirectiveDecls(CGF
, S
, NumIteratorsGen
);
3875 emitScanBasedDirective(CGF
, S
, NumIteratorsGen
, FirstGen
, SecondGen
);
3876 if (!isOpenMPParallelDirective(S
.getDirectiveKind()))
3877 emitScanBasedDirectiveFinals(CGF
, S
, NumIteratorsGen
);
3879 CodeGenFunction::OMPCancelStackRAII
CancelRegion(CGF
, S
.getDirectiveKind(),
3881 HasLastprivates
= CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(),
3883 emitDispatchForLoopBounds
);
3885 return HasLastprivates
;
3888 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective
&S
) {
3891 for (OMPClause
*C
: S
.clauses()) {
3892 if (isa
<OMPNowaitClause
>(C
))
3895 if (auto *SC
= dyn_cast
<OMPScheduleClause
>(C
)) {
3896 if (SC
->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown
)
3898 if (SC
->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown
)
3900 switch (SC
->getScheduleKind()) {
3901 case OMPC_SCHEDULE_auto
:
3902 case OMPC_SCHEDULE_dynamic
:
3903 case OMPC_SCHEDULE_runtime
:
3904 case OMPC_SCHEDULE_guided
:
3905 case OMPC_SCHEDULE_static
:
3907 case OMPC_SCHEDULE_unknown
:
3918 static llvm::omp::ScheduleKind
3919 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind
) {
3920 switch (ScheduleClauseKind
) {
3921 case OMPC_SCHEDULE_unknown
:
3922 return llvm::omp::OMP_SCHEDULE_Default
;
3923 case OMPC_SCHEDULE_auto
:
3924 return llvm::omp::OMP_SCHEDULE_Auto
;
3925 case OMPC_SCHEDULE_dynamic
:
3926 return llvm::omp::OMP_SCHEDULE_Dynamic
;
3927 case OMPC_SCHEDULE_guided
:
3928 return llvm::omp::OMP_SCHEDULE_Guided
;
3929 case OMPC_SCHEDULE_runtime
:
3930 return llvm::omp::OMP_SCHEDULE_Runtime
;
3931 case OMPC_SCHEDULE_static
:
3932 return llvm::omp::OMP_SCHEDULE_Static
;
3934 llvm_unreachable("Unhandled schedule kind");
3937 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective
&S
) {
3938 bool HasLastprivates
= false;
3939 bool UseOMPIRBuilder
=
3940 CGM
.getLangOpts().OpenMPIRBuilder
&& isSupportedByOpenMPIRBuilder(S
);
3941 auto &&CodeGen
= [this, &S
, &HasLastprivates
,
3942 UseOMPIRBuilder
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3943 // Use the OpenMPIRBuilder if enabled.
3944 if (UseOMPIRBuilder
) {
3945 bool NeedsBarrier
= !S
.getSingleClause
<OMPNowaitClause
>();
3947 llvm::omp::ScheduleKind SchedKind
= llvm::omp::OMP_SCHEDULE_Default
;
3948 llvm::Value
*ChunkSize
= nullptr;
3949 if (auto *SchedClause
= S
.getSingleClause
<OMPScheduleClause
>()) {
3951 convertClauseKindToSchedKind(SchedClause
->getScheduleKind());
3952 if (const Expr
*ChunkSizeExpr
= SchedClause
->getChunkSize())
3953 ChunkSize
= EmitScalarExpr(ChunkSizeExpr
);
3956 // Emit the associated statement and get its loop representation.
3957 const Stmt
*Inner
= S
.getRawStmt();
3958 llvm::CanonicalLoopInfo
*CLI
=
3959 EmitOMPCollapsedCanonicalLoopNest(Inner
, 1);
3961 llvm::OpenMPIRBuilder
&OMPBuilder
=
3962 CGM
.getOpenMPRuntime().getOMPBuilder();
3963 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
3964 AllocaInsertPt
->getParent(), AllocaInsertPt
->getIterator());
3965 OMPBuilder
.applyWorkshareLoop(
3966 Builder
.getCurrentDebugLocation(), CLI
, AllocaIP
, NeedsBarrier
,
3967 SchedKind
, ChunkSize
, /*HasSimdModifier=*/false,
3968 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
3969 /*HasOrderedClause=*/false);
3973 HasLastprivates
= emitWorksharingDirective(CGF
, S
, S
.hasCancel());
3977 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
3978 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
3979 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for
, CodeGen
,
3983 if (!UseOMPIRBuilder
) {
3984 // Emit an implicit barrier at the end.
3985 if (!S
.getSingleClause
<OMPNowaitClause
>() || HasLastprivates
)
3986 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(), OMPD_for
);
3988 // Check for outer lastprivate conditional update.
3989 checkForLastprivateConditionalUpdate(*this, S
);
3992 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective
&S
) {
3993 bool HasLastprivates
= false;
3994 auto &&CodeGen
= [&S
, &HasLastprivates
](CodeGenFunction
&CGF
,
3995 PrePostActionTy
&) {
3996 HasLastprivates
= emitWorksharingDirective(CGF
, S
, /*HasCancel=*/false);
4000 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4001 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
4002 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
, CodeGen
);
4005 // Emit an implicit barrier at the end.
4006 if (!S
.getSingleClause
<OMPNowaitClause
>() || HasLastprivates
)
4007 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(), OMPD_for
);
4008 // Check for outer lastprivate conditional update.
4009 checkForLastprivateConditionalUpdate(*this, S
);
4012 static LValue
createSectionLVal(CodeGenFunction
&CGF
, QualType Ty
,
4014 llvm::Value
*Init
= nullptr) {
4015 LValue LVal
= CGF
.MakeAddrLValue(CGF
.CreateMemTemp(Ty
, Name
), Ty
);
4017 CGF
.EmitStoreThroughLValue(RValue::get(Init
), LVal
, /*isInit*/ true);
4021 void CodeGenFunction::EmitSections(const OMPExecutableDirective
&S
) {
4022 const Stmt
*CapturedStmt
= S
.getInnermostCapturedStmt()->getCapturedStmt();
4023 const auto *CS
= dyn_cast
<CompoundStmt
>(CapturedStmt
);
4024 bool HasLastprivates
= false;
4025 auto &&CodeGen
= [&S
, CapturedStmt
, CS
,
4026 &HasLastprivates
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
4027 const ASTContext
&C
= CGF
.getContext();
4028 QualType KmpInt32Ty
=
4029 C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4030 // Emit helper vars inits.
4031 LValue LB
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.lb.",
4032 CGF
.Builder
.getInt32(0));
4033 llvm::ConstantInt
*GlobalUBVal
= CS
!= nullptr
4034 ? CGF
.Builder
.getInt32(CS
->size() - 1)
4035 : CGF
.Builder
.getInt32(0);
4037 createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.ub.", GlobalUBVal
);
4038 LValue ST
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.st.",
4039 CGF
.Builder
.getInt32(1));
4040 LValue IL
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.il.",
4041 CGF
.Builder
.getInt32(0));
4043 LValue IV
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.iv.");
4044 OpaqueValueExpr
IVRefExpr(S
.getBeginLoc(), KmpInt32Ty
, VK_LValue
);
4045 CodeGenFunction::OpaqueValueMapping
OpaqueIV(CGF
, &IVRefExpr
, IV
);
4046 OpaqueValueExpr
UBRefExpr(S
.getBeginLoc(), KmpInt32Ty
, VK_LValue
);
4047 CodeGenFunction::OpaqueValueMapping
OpaqueUB(CGF
, &UBRefExpr
, UB
);
4048 // Generate condition for loop.
4049 BinaryOperator
*Cond
= BinaryOperator::Create(
4050 C
, &IVRefExpr
, &UBRefExpr
, BO_LE
, C
.BoolTy
, VK_PRValue
, OK_Ordinary
,
4051 S
.getBeginLoc(), FPOptionsOverride());
4052 // Increment for loop counter.
4053 UnaryOperator
*Inc
= UnaryOperator::Create(
4054 C
, &IVRefExpr
, UO_PreInc
, KmpInt32Ty
, VK_PRValue
, OK_Ordinary
,
4055 S
.getBeginLoc(), true, FPOptionsOverride());
4056 auto &&BodyGen
= [CapturedStmt
, CS
, &S
, &IV
](CodeGenFunction
&CGF
) {
4057 // Iterate through all sections and emit a switch construct:
4060 // <SectionStmt[0]>;
4063 // case <NumSection> - 1:
4064 // <SectionStmt[<NumSection> - 1]>;
4067 // .omp.sections.exit:
4068 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".omp.sections.exit");
4069 llvm::SwitchInst
*SwitchStmt
=
4070 CGF
.Builder
.CreateSwitch(CGF
.EmitLoadOfScalar(IV
, S
.getBeginLoc()),
4071 ExitBB
, CS
== nullptr ? 1 : CS
->size());
4073 unsigned CaseNumber
= 0;
4074 for (const Stmt
*SubStmt
: CS
->children()) {
4075 auto CaseBB
= CGF
.createBasicBlock(".omp.sections.case");
4076 CGF
.EmitBlock(CaseBB
);
4077 SwitchStmt
->addCase(CGF
.Builder
.getInt32(CaseNumber
), CaseBB
);
4078 CGF
.EmitStmt(SubStmt
);
4079 CGF
.EmitBranch(ExitBB
);
4083 llvm::BasicBlock
*CaseBB
= CGF
.createBasicBlock(".omp.sections.case");
4084 CGF
.EmitBlock(CaseBB
);
4085 SwitchStmt
->addCase(CGF
.Builder
.getInt32(0), CaseBB
);
4086 CGF
.EmitStmt(CapturedStmt
);
4087 CGF
.EmitBranch(ExitBB
);
4089 CGF
.EmitBlock(ExitBB
, /*IsFinished=*/true);
4092 CodeGenFunction::OMPPrivateScope
LoopScope(CGF
);
4093 if (CGF
.EmitOMPFirstprivateClause(S
, LoopScope
)) {
4094 // Emit implicit barrier to synchronize threads and avoid data races on
4095 // initialization of firstprivate variables and post-update of lastprivate
4097 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(
4098 CGF
, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
4099 /*ForceSimpleCall=*/true);
4101 CGF
.EmitOMPPrivateClause(S
, LoopScope
);
4102 CGOpenMPRuntime::LastprivateConditionalRAII
LPCRegion(CGF
, S
, IV
);
4103 HasLastprivates
= CGF
.EmitOMPLastprivateClauseInit(S
, LoopScope
);
4104 CGF
.EmitOMPReductionClauseInit(S
, LoopScope
);
4105 (void)LoopScope
.Privatize();
4106 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
4107 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
4109 // Emit static non-chunked loop.
4110 OpenMPScheduleTy ScheduleKind
;
4111 ScheduleKind
.Schedule
= OMPC_SCHEDULE_static
;
4112 CGOpenMPRuntime::StaticRTInput
StaticInit(
4113 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL
.getAddress(),
4114 LB
.getAddress(), UB
.getAddress(), ST
.getAddress());
4115 CGF
.CGM
.getOpenMPRuntime().emitForStaticInit(
4116 CGF
, S
.getBeginLoc(), S
.getDirectiveKind(), ScheduleKind
, StaticInit
);
4117 // UB = min(UB, GlobalUB);
4118 llvm::Value
*UBVal
= CGF
.EmitLoadOfScalar(UB
, S
.getBeginLoc());
4119 llvm::Value
*MinUBGlobalUB
= CGF
.Builder
.CreateSelect(
4120 CGF
.Builder
.CreateICmpSLT(UBVal
, GlobalUBVal
), UBVal
, GlobalUBVal
);
4121 CGF
.EmitStoreOfScalar(MinUBGlobalUB
, UB
);
4123 CGF
.EmitStoreOfScalar(CGF
.EmitLoadOfScalar(LB
, S
.getBeginLoc()), IV
);
4124 // while (idx <= UB) { BODY; ++idx; }
4125 CGF
.EmitOMPInnerLoop(S
, /*RequiresCleanup=*/false, Cond
, Inc
, BodyGen
,
4126 [](CodeGenFunction
&) {});
4127 // Tell the runtime we are done.
4128 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
) {
4129 CGF
.CGM
.getOpenMPRuntime().emitForStaticFinish(CGF
, S
.getEndLoc(),
4132 CGF
.OMPCancelStack
.emitExit(CGF
, S
.getDirectiveKind(), CodeGen
);
4133 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
4134 // Emit post-update of the reduction variables if IsLastIter != 0.
4135 emitPostUpdateForReductionClause(CGF
, S
, [IL
, &S
](CodeGenFunction
&CGF
) {
4136 return CGF
.Builder
.CreateIsNotNull(
4137 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
4140 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4141 if (HasLastprivates
)
4142 CGF
.EmitOMPLastprivateClauseFinal(
4143 S
, /*NoFinals=*/false,
4144 CGF
.Builder
.CreateIsNotNull(
4145 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc())));
4148 bool HasCancel
= false;
4149 if (auto *OSD
= dyn_cast
<OMPSectionsDirective
>(&S
))
4150 HasCancel
= OSD
->hasCancel();
4151 else if (auto *OPSD
= dyn_cast
<OMPParallelSectionsDirective
>(&S
))
4152 HasCancel
= OPSD
->hasCancel();
4153 OMPCancelStackRAII
CancelRegion(*this, S
.getDirectiveKind(), HasCancel
);
4154 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections
, CodeGen
,
4156 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4157 // clause. Otherwise the barrier will be generated by the codegen for the
4159 if (HasLastprivates
&& S
.getSingleClause
<OMPNowaitClause
>()) {
4160 // Emit implicit barrier to synchronize threads and avoid data races on
4161 // initialization of firstprivate variables.
4162 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(),
4167 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective
&S
) {
4168 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4169 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4170 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4171 using BodyGenCallbackTy
= llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy
;
4173 auto FiniCB
= [this](InsertPointTy IP
) {
4174 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4177 const CapturedStmt
*ICS
= S
.getInnermostCapturedStmt();
4178 const Stmt
*CapturedStmt
= S
.getInnermostCapturedStmt()->getCapturedStmt();
4179 const auto *CS
= dyn_cast
<CompoundStmt
>(CapturedStmt
);
4180 llvm::SmallVector
<BodyGenCallbackTy
, 4> SectionCBVector
;
4182 for (const Stmt
*SubStmt
: CS
->children()) {
4183 auto SectionCB
= [this, SubStmt
](InsertPointTy AllocaIP
,
4184 InsertPointTy CodeGenIP
) {
4185 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4186 *this, SubStmt
, AllocaIP
, CodeGenIP
, "section");
4188 SectionCBVector
.push_back(SectionCB
);
4191 auto SectionCB
= [this, CapturedStmt
](InsertPointTy AllocaIP
,
4192 InsertPointTy CodeGenIP
) {
4193 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4194 *this, CapturedStmt
, AllocaIP
, CodeGenIP
, "section");
4196 SectionCBVector
.push_back(SectionCB
);
4199 // Privatization callback that performs appropriate action for
4200 // shared/private/firstprivate/lastprivate/copyin/... variables.
4202 // TODO: This defaults to shared right now.
4203 auto PrivCB
= [](InsertPointTy AllocaIP
, InsertPointTy CodeGenIP
,
4204 llvm::Value
&, llvm::Value
&Val
, llvm::Value
*&ReplVal
) {
4205 // The next line is appropriate only for variables (Val) with the
4206 // data-sharing attribute "shared".
4212 CGCapturedStmtInfo
CGSI(*ICS
, CR_OpenMP
);
4213 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(*this, &CGSI
);
4214 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
4215 AllocaInsertPt
->getParent(), AllocaInsertPt
->getIterator());
4216 Builder
.restoreIP(OMPBuilder
.createSections(
4217 Builder
, AllocaIP
, SectionCBVector
, PrivCB
, FiniCB
, S
.hasCancel(),
4218 S
.getSingleClause
<OMPNowaitClause
>()));
4223 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4224 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
4227 // Emit an implicit barrier at the end.
4228 if (!S
.getSingleClause
<OMPNowaitClause
>()) {
4229 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(),
4232 // Check for outer lastprivate conditional update.
4233 checkForLastprivateConditionalUpdate(*this, S
);
4236 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective
&S
) {
4237 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4238 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4239 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4241 const Stmt
*SectionRegionBodyStmt
= S
.getAssociatedStmt();
4242 auto FiniCB
= [this](InsertPointTy IP
) {
4243 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4246 auto BodyGenCB
= [SectionRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4247 InsertPointTy CodeGenIP
) {
4248 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4249 *this, SectionRegionBodyStmt
, AllocaIP
, CodeGenIP
, "section");
4252 LexicalScope
Scope(*this, S
.getSourceRange());
4254 Builder
.restoreIP(OMPBuilder
.createSection(Builder
, BodyGenCB
, FiniCB
));
4258 LexicalScope
Scope(*this, S
.getSourceRange());
4260 EmitStmt(S
.getAssociatedStmt());
4263 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective
&S
) {
4264 llvm::SmallVector
<const Expr
*, 8> CopyprivateVars
;
4265 llvm::SmallVector
<const Expr
*, 8> DestExprs
;
4266 llvm::SmallVector
<const Expr
*, 8> SrcExprs
;
4267 llvm::SmallVector
<const Expr
*, 8> AssignmentOps
;
4268 // Check if there are any 'copyprivate' clauses associated with this
4269 // 'single' construct.
4270 // Build a list of copyprivate variables along with helper expressions
4271 // (<source>, <destination>, <destination>=<source> expressions)
4272 for (const auto *C
: S
.getClausesOfKind
<OMPCopyprivateClause
>()) {
4273 CopyprivateVars
.append(C
->varlists().begin(), C
->varlists().end());
4274 DestExprs
.append(C
->destination_exprs().begin(),
4275 C
->destination_exprs().end());
4276 SrcExprs
.append(C
->source_exprs().begin(), C
->source_exprs().end());
4277 AssignmentOps
.append(C
->assignment_ops().begin(),
4278 C
->assignment_ops().end());
4280 // Emit code for 'single' region along with 'copyprivate' clauses
4281 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4283 OMPPrivateScope
SingleScope(CGF
);
4284 (void)CGF
.EmitOMPFirstprivateClause(S
, SingleScope
);
4285 CGF
.EmitOMPPrivateClause(S
, SingleScope
);
4286 (void)SingleScope
.Privatize();
4287 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
4291 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4292 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
4293 CGM
.getOpenMPRuntime().emitSingleRegion(*this, CodeGen
, S
.getBeginLoc(),
4294 CopyprivateVars
, DestExprs
,
4295 SrcExprs
, AssignmentOps
);
4297 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4298 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4299 if (!S
.getSingleClause
<OMPNowaitClause
>() && CopyprivateVars
.empty()) {
4300 CGM
.getOpenMPRuntime().emitBarrierCall(
4301 *this, S
.getBeginLoc(),
4302 S
.getSingleClause
<OMPNowaitClause
>() ? OMPD_unknown
: OMPD_single
);
4304 // Check for outer lastprivate conditional update.
4305 checkForLastprivateConditionalUpdate(*this, S
);
4308 static void emitMaster(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
4309 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4311 CGF
.EmitStmt(S
.getRawStmt());
4313 CGF
.CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, CodeGen
, S
.getBeginLoc());
4316 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective
&S
) {
4317 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4318 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4319 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4321 const Stmt
*MasterRegionBodyStmt
= S
.getAssociatedStmt();
4323 auto FiniCB
= [this](InsertPointTy IP
) {
4324 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4327 auto BodyGenCB
= [MasterRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4328 InsertPointTy CodeGenIP
) {
4329 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4330 *this, MasterRegionBodyStmt
, AllocaIP
, CodeGenIP
, "master");
4333 LexicalScope
Scope(*this, S
.getSourceRange());
4335 Builder
.restoreIP(OMPBuilder
.createMaster(Builder
, BodyGenCB
, FiniCB
));
4339 LexicalScope
Scope(*this, S
.getSourceRange());
4341 emitMaster(*this, S
);
4344 static void emitMasked(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
4345 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4347 CGF
.EmitStmt(S
.getRawStmt());
4349 Expr
*Filter
= nullptr;
4350 if (const auto *FilterClause
= S
.getSingleClause
<OMPFilterClause
>())
4351 Filter
= FilterClause
->getThreadID();
4352 CGF
.CGM
.getOpenMPRuntime().emitMaskedRegion(CGF
, CodeGen
, S
.getBeginLoc(),
4356 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective
&S
) {
4357 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4358 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4359 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4361 const Stmt
*MaskedRegionBodyStmt
= S
.getAssociatedStmt();
4362 const Expr
*Filter
= nullptr;
4363 if (const auto *FilterClause
= S
.getSingleClause
<OMPFilterClause
>())
4364 Filter
= FilterClause
->getThreadID();
4365 llvm::Value
*FilterVal
= Filter
4366 ? EmitScalarExpr(Filter
, CGM
.Int32Ty
)
4367 : llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/0);
4369 auto FiniCB
= [this](InsertPointTy IP
) {
4370 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4373 auto BodyGenCB
= [MaskedRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4374 InsertPointTy CodeGenIP
) {
4375 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4376 *this, MaskedRegionBodyStmt
, AllocaIP
, CodeGenIP
, "masked");
4379 LexicalScope
Scope(*this, S
.getSourceRange());
4382 OMPBuilder
.createMasked(Builder
, BodyGenCB
, FiniCB
, FilterVal
));
4386 LexicalScope
Scope(*this, S
.getSourceRange());
4388 emitMasked(*this, S
);
4391 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective
&S
) {
4392 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4393 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4394 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4396 const Stmt
*CriticalRegionBodyStmt
= S
.getAssociatedStmt();
4397 const Expr
*Hint
= nullptr;
4398 if (const auto *HintClause
= S
.getSingleClause
<OMPHintClause
>())
4399 Hint
= HintClause
->getHint();
4401 // TODO: This is slightly different from what's currently being done in
4402 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4403 // about typing is final.
4404 llvm::Value
*HintInst
= nullptr;
4407 Builder
.CreateIntCast(EmitScalarExpr(Hint
), CGM
.Int32Ty
, false);
4409 auto FiniCB
= [this](InsertPointTy IP
) {
4410 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4413 auto BodyGenCB
= [CriticalRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4414 InsertPointTy CodeGenIP
) {
4415 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4416 *this, CriticalRegionBodyStmt
, AllocaIP
, CodeGenIP
, "critical");
4419 LexicalScope
Scope(*this, S
.getSourceRange());
4421 Builder
.restoreIP(OMPBuilder
.createCritical(
4422 Builder
, BodyGenCB
, FiniCB
, S
.getDirectiveName().getAsString(),
4428 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4430 CGF
.EmitStmt(S
.getAssociatedStmt());
4432 const Expr
*Hint
= nullptr;
4433 if (const auto *HintClause
= S
.getSingleClause
<OMPHintClause
>())
4434 Hint
= HintClause
->getHint();
4435 LexicalScope
Scope(*this, S
.getSourceRange());
4437 CGM
.getOpenMPRuntime().emitCriticalRegion(*this,
4438 S
.getDirectiveName().getAsString(),
4439 CodeGen
, S
.getBeginLoc(), Hint
);
4442 void CodeGenFunction::EmitOMPParallelForDirective(
4443 const OMPParallelForDirective
&S
) {
4444 // Emit directive as a combined directive that consists of two implicit
4445 // directives: 'parallel' with 'for' directive.
4446 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4448 emitOMPCopyinClause(CGF
, S
);
4449 (void)emitWorksharingDirective(CGF
, S
, S
.hasCancel());
4452 const auto &&NumIteratorsGen
= [&S
](CodeGenFunction
&CGF
) {
4453 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
4454 CGCapturedStmtInfo
CGSI(CR_OpenMP
);
4455 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGSI
);
4456 OMPLoopScope
LoopScope(CGF
, S
);
4457 return CGF
.EmitScalarExpr(S
.getNumIterations());
4459 bool IsInscan
= llvm::any_of(S
.getClausesOfKind
<OMPReductionClause
>(),
4460 [](const OMPReductionClause
*C
) {
4461 return C
->getModifier() == OMPC_REDUCTION_inscan
;
4464 emitScanBasedDirectiveDecls(*this, S
, NumIteratorsGen
);
4466 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4467 emitCommonOMPParallelDirective(*this, S
, OMPD_for
, CodeGen
,
4468 emitEmptyBoundParameters
);
4470 emitScanBasedDirectiveFinals(*this, S
, NumIteratorsGen
);
4472 // Check for outer lastprivate conditional update.
4473 checkForLastprivateConditionalUpdate(*this, S
);
4476 void CodeGenFunction::EmitOMPParallelForSimdDirective(
4477 const OMPParallelForSimdDirective
&S
) {
4478 // Emit directive as a combined directive that consists of two implicit
4479 // directives: 'parallel' with 'for' directive.
4480 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4482 emitOMPCopyinClause(CGF
, S
);
4483 (void)emitWorksharingDirective(CGF
, S
, /*HasCancel=*/false);
4486 const auto &&NumIteratorsGen
= [&S
](CodeGenFunction
&CGF
) {
4487 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
4488 CGCapturedStmtInfo
CGSI(CR_OpenMP
);
4489 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGSI
);
4490 OMPLoopScope
LoopScope(CGF
, S
);
4491 return CGF
.EmitScalarExpr(S
.getNumIterations());
4493 bool IsInscan
= llvm::any_of(S
.getClausesOfKind
<OMPReductionClause
>(),
4494 [](const OMPReductionClause
*C
) {
4495 return C
->getModifier() == OMPC_REDUCTION_inscan
;
4498 emitScanBasedDirectiveDecls(*this, S
, NumIteratorsGen
);
4500 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4501 emitCommonOMPParallelDirective(*this, S
, OMPD_for_simd
, CodeGen
,
4502 emitEmptyBoundParameters
);
4504 emitScanBasedDirectiveFinals(*this, S
, NumIteratorsGen
);
4506 // Check for outer lastprivate conditional update.
4507 checkForLastprivateConditionalUpdate(*this, S
);
4510 void CodeGenFunction::EmitOMPParallelMasterDirective(
4511 const OMPParallelMasterDirective
&S
) {
4512 // Emit directive as a combined directive that consists of two implicit
4513 // directives: 'parallel' with 'master' directive.
4514 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4516 OMPPrivateScope
PrivateScope(CGF
);
4517 emitOMPCopyinClause(CGF
, S
);
4518 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
4519 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
4520 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
4521 (void)PrivateScope
.Privatize();
4523 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
4527 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4528 emitCommonOMPParallelDirective(*this, S
, OMPD_master
, CodeGen
,
4529 emitEmptyBoundParameters
);
4530 emitPostUpdateForReductionClause(*this, S
,
4531 [](CodeGenFunction
&) { return nullptr; });
4533 // Check for outer lastprivate conditional update.
4534 checkForLastprivateConditionalUpdate(*this, S
);
4537 void CodeGenFunction::EmitOMPParallelMaskedDirective(
4538 const OMPParallelMaskedDirective
&S
) {
4539 // Emit directive as a combined directive that consists of two implicit
4540 // directives: 'parallel' with 'masked' directive.
4541 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4543 OMPPrivateScope
PrivateScope(CGF
);
4544 emitOMPCopyinClause(CGF
, S
);
4545 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
4546 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
4547 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
4548 (void)PrivateScope
.Privatize();
4550 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
4554 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4555 emitCommonOMPParallelDirective(*this, S
, OMPD_masked
, CodeGen
,
4556 emitEmptyBoundParameters
);
4557 emitPostUpdateForReductionClause(*this, S
,
4558 [](CodeGenFunction
&) { return nullptr; });
4560 // Check for outer lastprivate conditional update.
4561 checkForLastprivateConditionalUpdate(*this, S
);
4564 void CodeGenFunction::EmitOMPParallelSectionsDirective(
4565 const OMPParallelSectionsDirective
&S
) {
4566 // Emit directive as a combined directive that consists of two implicit
4567 // directives: 'parallel' with 'sections' directive.
4568 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4570 emitOMPCopyinClause(CGF
, S
);
4571 CGF
.EmitSections(S
);
4575 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4576 emitCommonOMPParallelDirective(*this, S
, OMPD_sections
, CodeGen
,
4577 emitEmptyBoundParameters
);
4579 // Check for outer lastprivate conditional update.
4580 checkForLastprivateConditionalUpdate(*this, S
);
4584 /// Get the list of variables declared in the context of the untied tasks.
4585 class CheckVarsEscapingUntiedTaskDeclContext final
4586 : public ConstStmtVisitor
<CheckVarsEscapingUntiedTaskDeclContext
> {
4587 llvm::SmallVector
<const VarDecl
*, 4> PrivateDecls
;
4590 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4591 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4592 void VisitDeclStmt(const DeclStmt
*S
) {
4595 // Need to privatize only local vars, static locals can be processed as is.
4596 for (const Decl
*D
: S
->decls()) {
4597 if (const auto *VD
= dyn_cast_or_null
<VarDecl
>(D
))
4598 if (VD
->hasLocalStorage())
4599 PrivateDecls
.push_back(VD
);
4602 void VisitOMPExecutableDirective(const OMPExecutableDirective
*) {}
4603 void VisitCapturedStmt(const CapturedStmt
*) {}
4604 void VisitLambdaExpr(const LambdaExpr
*) {}
4605 void VisitBlockExpr(const BlockExpr
*) {}
4606 void VisitStmt(const Stmt
*S
) {
4609 for (const Stmt
*Child
: S
->children())
4614 /// Swaps list of vars with the provided one.
4615 ArrayRef
<const VarDecl
*> getPrivateDecls() const { return PrivateDecls
; }
4617 } // anonymous namespace
4619 static void buildDependences(const OMPExecutableDirective
&S
,
4620 OMPTaskDataTy
&Data
) {
4622 // First look for 'omp_all_memory' and add this first.
4623 bool OmpAllMemory
= false;
4625 S
.getClausesOfKind
<OMPDependClause
>(), [](const OMPDependClause
*C
) {
4626 return C
->getDependencyKind() == OMPC_DEPEND_outallmemory
||
4627 C
->getDependencyKind() == OMPC_DEPEND_inoutallmemory
;
4629 OmpAllMemory
= true;
4630 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4631 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4633 OMPTaskDataTy::DependData
&DD
=
4634 Data
.Dependences
.emplace_back(OMPC_DEPEND_outallmemory
,
4635 /*IteratorExpr=*/nullptr);
4636 // Add a nullptr Expr to simplify the codegen in emitDependData.
4637 DD
.DepExprs
.push_back(nullptr);
4639 // Add remaining dependences skipping any 'out' or 'inout' if they are
4640 // overridden by 'omp_all_memory'.
4641 for (const auto *C
: S
.getClausesOfKind
<OMPDependClause
>()) {
4642 OpenMPDependClauseKind Kind
= C
->getDependencyKind();
4643 if (Kind
== OMPC_DEPEND_outallmemory
|| Kind
== OMPC_DEPEND_inoutallmemory
)
4645 if (OmpAllMemory
&& (Kind
== OMPC_DEPEND_out
|| Kind
== OMPC_DEPEND_inout
))
4647 OMPTaskDataTy::DependData
&DD
=
4648 Data
.Dependences
.emplace_back(C
->getDependencyKind(), C
->getModifier());
4649 DD
.DepExprs
.append(C
->varlist_begin(), C
->varlist_end());
4653 void CodeGenFunction::EmitOMPTaskBasedDirective(
4654 const OMPExecutableDirective
&S
, const OpenMPDirectiveKind CapturedRegion
,
4655 const RegionCodeGenTy
&BodyGen
, const TaskGenTy
&TaskGen
,
4656 OMPTaskDataTy
&Data
) {
4657 // Emit outlined function for task construct.
4658 const CapturedStmt
*CS
= S
.getCapturedStmt(CapturedRegion
);
4659 auto I
= CS
->getCapturedDecl()->param_begin();
4660 auto PartId
= std::next(I
);
4661 auto TaskT
= std::next(I
, 4);
4662 // Check if the task is final
4663 if (const auto *Clause
= S
.getSingleClause
<OMPFinalClause
>()) {
4664 // If the condition constant folds and can be elided, try to avoid emitting
4665 // the condition and the dead arm of the if/else.
4666 const Expr
*Cond
= Clause
->getCondition();
4668 if (ConstantFoldsToSimpleInteger(Cond
, CondConstant
))
4669 Data
.Final
.setInt(CondConstant
);
4671 Data
.Final
.setPointer(EvaluateExprAsBool(Cond
));
4673 // By default the task is not final.
4674 Data
.Final
.setInt(/*IntVal=*/false);
4676 // Check if the task has 'priority' clause.
4677 if (const auto *Clause
= S
.getSingleClause
<OMPPriorityClause
>()) {
4678 const Expr
*Prio
= Clause
->getPriority();
4679 Data
.Priority
.setInt(/*IntVal=*/true);
4680 Data
.Priority
.setPointer(EmitScalarConversion(
4681 EmitScalarExpr(Prio
), Prio
->getType(),
4682 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4683 Prio
->getExprLoc()));
4685 // The first function argument for tasks is a thread id, the second one is a
4686 // part id (0 for tied tasks, >=0 for untied task).
4687 llvm::DenseSet
<const VarDecl
*> EmittedAsPrivate
;
4688 // Get list of private variables.
4689 for (const auto *C
: S
.getClausesOfKind
<OMPPrivateClause
>()) {
4690 auto IRef
= C
->varlist_begin();
4691 for (const Expr
*IInit
: C
->private_copies()) {
4692 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
4693 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
4694 Data
.PrivateVars
.push_back(*IRef
);
4695 Data
.PrivateCopies
.push_back(IInit
);
4700 EmittedAsPrivate
.clear();
4701 // Get list of firstprivate variables.
4702 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
4703 auto IRef
= C
->varlist_begin();
4704 auto IElemInitRef
= C
->inits().begin();
4705 for (const Expr
*IInit
: C
->private_copies()) {
4706 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
4707 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
4708 Data
.FirstprivateVars
.push_back(*IRef
);
4709 Data
.FirstprivateCopies
.push_back(IInit
);
4710 Data
.FirstprivateInits
.push_back(*IElemInitRef
);
4716 // Get list of lastprivate variables (for taskloops).
4717 llvm::MapVector
<const VarDecl
*, const DeclRefExpr
*> LastprivateDstsOrigs
;
4718 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
4719 auto IRef
= C
->varlist_begin();
4720 auto ID
= C
->destination_exprs().begin();
4721 for (const Expr
*IInit
: C
->private_copies()) {
4722 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
4723 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
4724 Data
.LastprivateVars
.push_back(*IRef
);
4725 Data
.LastprivateCopies
.push_back(IInit
);
4727 LastprivateDstsOrigs
.insert(
4728 std::make_pair(cast
<VarDecl
>(cast
<DeclRefExpr
>(*ID
)->getDecl()),
4729 cast
<DeclRefExpr
>(*IRef
)));
4734 SmallVector
<const Expr
*, 4> LHSs
;
4735 SmallVector
<const Expr
*, 4> RHSs
;
4736 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
4737 Data
.ReductionVars
.append(C
->varlist_begin(), C
->varlist_end());
4738 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
4739 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
4740 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
4741 C
->reduction_ops().end());
4742 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
4743 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
4745 Data
.Reductions
= CGM
.getOpenMPRuntime().emitTaskReductionInit(
4746 *this, S
.getBeginLoc(), LHSs
, RHSs
, Data
);
4747 // Build list of dependences.
4748 buildDependences(S
, Data
);
4749 // Get list of local vars for untied tasks.
4751 CheckVarsEscapingUntiedTaskDeclContext Checker
;
4752 Checker
.Visit(S
.getInnermostCapturedStmt()->getCapturedStmt());
4753 Data
.PrivateLocals
.append(Checker
.getPrivateDecls().begin(),
4754 Checker
.getPrivateDecls().end());
4756 auto &&CodeGen
= [&Data
, &S
, CS
, &BodyGen
, &LastprivateDstsOrigs
,
4757 CapturedRegion
](CodeGenFunction
&CGF
,
4758 PrePostActionTy
&Action
) {
4759 llvm::MapVector
<CanonicalDeclPtr
<const VarDecl
>,
4760 std::pair
<Address
, Address
>>
4762 // Set proper addresses for generated private copies.
4763 OMPPrivateScope
Scope(CGF
);
4764 // Generate debug info for variables present in shared clause.
4765 if (auto *DI
= CGF
.getDebugInfo()) {
4766 llvm::SmallDenseMap
<const VarDecl
*, FieldDecl
*> CaptureFields
=
4767 CGF
.CapturedStmtInfo
->getCaptureFields();
4768 llvm::Value
*ContextValue
= CGF
.CapturedStmtInfo
->getContextValue();
4769 if (CaptureFields
.size() && ContextValue
) {
4770 unsigned CharWidth
= CGF
.getContext().getCharWidth();
4771 // The shared variables are packed together as members of structure.
4772 // So the address of each shared variable can be computed by adding
4773 // offset of it (within record) to the base address of record. For each
4774 // shared variable, debug intrinsic llvm.dbg.declare is generated with
4775 // appropriate expressions (DIExpression).
4777 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4778 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4780 // metadata !DIExpression(DW_OP_deref))
4781 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4783 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4784 for (auto It
= CaptureFields
.begin(); It
!= CaptureFields
.end(); ++It
) {
4785 const VarDecl
*SharedVar
= It
->first
;
4786 RecordDecl
*CaptureRecord
= It
->second
->getParent();
4787 const ASTRecordLayout
&Layout
=
4788 CGF
.getContext().getASTRecordLayout(CaptureRecord
);
4790 Layout
.getFieldOffset(It
->second
->getFieldIndex()) / CharWidth
;
4791 if (CGF
.CGM
.getCodeGenOpts().hasReducedDebugInfo())
4792 (void)DI
->EmitDeclareOfAutoVariable(SharedVar
, ContextValue
,
4793 CGF
.Builder
, false);
4794 // Get the call dbg.declare instruction we just created and update
4795 // its DIExpression to add offset to base address.
4796 auto UpdateExpr
= [](llvm::LLVMContext
&Ctx
, auto *Declare
,
4798 SmallVector
<uint64_t, 8> Ops
;
4799 // Add offset to the base address if non zero.
4801 Ops
.push_back(llvm::dwarf::DW_OP_plus_uconst
);
4802 Ops
.push_back(Offset
);
4804 Ops
.push_back(llvm::dwarf::DW_OP_deref
);
4805 Declare
->setExpression(llvm::DIExpression::get(Ctx
, Ops
));
4807 llvm::Instruction
&Last
= CGF
.Builder
.GetInsertBlock()->back();
4808 if (auto DDI
= dyn_cast
<llvm::DbgVariableIntrinsic
>(&Last
))
4809 UpdateExpr(DDI
->getContext(), DDI
, Offset
);
4810 // If we're emitting using the new debug info format into a block
4811 // without a terminator, the record will be "trailing".
4812 assert(!Last
.isTerminator() && "unexpected terminator");
4814 CGF
.Builder
.GetInsertBlock()->getTrailingDbgRecords()) {
4815 for (llvm::DbgVariableRecord
&DVR
: llvm::reverse(
4816 llvm::filterDbgVars(Marker
->getDbgRecordRange()))) {
4817 UpdateExpr(Last
.getContext(), &DVR
, Offset
);
4824 llvm::SmallVector
<std::pair
<const VarDecl
*, Address
>, 16> FirstprivatePtrs
;
4825 if (!Data
.PrivateVars
.empty() || !Data
.FirstprivateVars
.empty() ||
4826 !Data
.LastprivateVars
.empty() || !Data
.PrivateLocals
.empty()) {
4827 enum { PrivatesParam
= 2, CopyFnParam
= 3 };
4828 llvm::Value
*CopyFn
= CGF
.Builder
.CreateLoad(
4829 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(CopyFnParam
)));
4830 llvm::Value
*PrivatesPtr
= CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(
4831 CS
->getCapturedDecl()->getParam(PrivatesParam
)));
4833 llvm::SmallVector
<std::pair
<const VarDecl
*, Address
>, 16> PrivatePtrs
;
4834 llvm::SmallVector
<llvm::Value
*, 16> CallArgs
;
4835 llvm::SmallVector
<llvm::Type
*, 4> ParamTypes
;
4836 CallArgs
.push_back(PrivatesPtr
);
4837 ParamTypes
.push_back(PrivatesPtr
->getType());
4838 for (const Expr
*E
: Data
.PrivateVars
) {
4839 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
4840 RawAddress PrivatePtr
= CGF
.CreateMemTemp(
4841 CGF
.getContext().getPointerType(E
->getType()), ".priv.ptr.addr");
4842 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
4843 CallArgs
.push_back(PrivatePtr
.getPointer());
4844 ParamTypes
.push_back(PrivatePtr
.getType());
4846 for (const Expr
*E
: Data
.FirstprivateVars
) {
4847 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
4848 RawAddress PrivatePtr
=
4849 CGF
.CreateMemTemp(CGF
.getContext().getPointerType(E
->getType()),
4850 ".firstpriv.ptr.addr");
4851 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
4852 FirstprivatePtrs
.emplace_back(VD
, PrivatePtr
);
4853 CallArgs
.push_back(PrivatePtr
.getPointer());
4854 ParamTypes
.push_back(PrivatePtr
.getType());
4856 for (const Expr
*E
: Data
.LastprivateVars
) {
4857 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
4858 RawAddress PrivatePtr
=
4859 CGF
.CreateMemTemp(CGF
.getContext().getPointerType(E
->getType()),
4860 ".lastpriv.ptr.addr");
4861 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
4862 CallArgs
.push_back(PrivatePtr
.getPointer());
4863 ParamTypes
.push_back(PrivatePtr
.getType());
4865 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
4866 QualType Ty
= VD
->getType().getNonReferenceType();
4867 if (VD
->getType()->isLValueReferenceType())
4868 Ty
= CGF
.getContext().getPointerType(Ty
);
4869 if (isAllocatableDecl(VD
))
4870 Ty
= CGF
.getContext().getPointerType(Ty
);
4871 RawAddress PrivatePtr
= CGF
.CreateMemTemp(
4872 CGF
.getContext().getPointerType(Ty
), ".local.ptr.addr");
4873 auto Result
= UntiedLocalVars
.insert(
4874 std::make_pair(VD
, std::make_pair(PrivatePtr
, Address::invalid())));
4875 // If key exists update in place.
4876 if (Result
.second
== false)
4877 *Result
.first
= std::make_pair(
4878 VD
, std::make_pair(PrivatePtr
, Address::invalid()));
4879 CallArgs
.push_back(PrivatePtr
.getPointer());
4880 ParamTypes
.push_back(PrivatePtr
.getType());
4882 auto *CopyFnTy
= llvm::FunctionType::get(CGF
.Builder
.getVoidTy(),
4883 ParamTypes
, /*isVarArg=*/false);
4884 CGF
.CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(
4885 CGF
, S
.getBeginLoc(), {CopyFnTy
, CopyFn
}, CallArgs
);
4886 for (const auto &Pair
: LastprivateDstsOrigs
) {
4887 const auto *OrigVD
= cast
<VarDecl
>(Pair
.second
->getDecl());
4888 DeclRefExpr
DRE(CGF
.getContext(), const_cast<VarDecl
*>(OrigVD
),
4889 /*RefersToEnclosingVariableOrCapture=*/
4890 CGF
.CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
4891 Pair
.second
->getType(), VK_LValue
,
4892 Pair
.second
->getExprLoc());
4893 Scope
.addPrivate(Pair
.first
, CGF
.EmitLValue(&DRE
).getAddress());
4895 for (const auto &Pair
: PrivatePtrs
) {
4896 Address Replacement
= Address(
4897 CGF
.Builder
.CreateLoad(Pair
.second
),
4898 CGF
.ConvertTypeForMem(Pair
.first
->getType().getNonReferenceType()),
4899 CGF
.getContext().getDeclAlign(Pair
.first
));
4900 Scope
.addPrivate(Pair
.first
, Replacement
);
4901 if (auto *DI
= CGF
.getDebugInfo())
4902 if (CGF
.CGM
.getCodeGenOpts().hasReducedDebugInfo())
4903 (void)DI
->EmitDeclareOfAutoVariable(
4904 Pair
.first
, Pair
.second
.getBasePointer(), CGF
.Builder
,
4905 /*UsePointerValue*/ true);
4907 // Adjust mapping for internal locals by mapping actual memory instead of
4908 // a pointer to this memory.
4909 for (auto &Pair
: UntiedLocalVars
) {
4910 QualType VDType
= Pair
.first
->getType().getNonReferenceType();
4911 if (Pair
.first
->getType()->isLValueReferenceType())
4912 VDType
= CGF
.getContext().getPointerType(VDType
);
4913 if (isAllocatableDecl(Pair
.first
)) {
4914 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(Pair
.second
.first
);
4915 Address
Replacement(
4917 CGF
.ConvertTypeForMem(CGF
.getContext().getPointerType(VDType
)),
4918 CGF
.getPointerAlign());
4919 Pair
.second
.first
= Replacement
;
4920 Ptr
= CGF
.Builder
.CreateLoad(Replacement
);
4921 Replacement
= Address(Ptr
, CGF
.ConvertTypeForMem(VDType
),
4922 CGF
.getContext().getDeclAlign(Pair
.first
));
4923 Pair
.second
.second
= Replacement
;
4925 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(Pair
.second
.first
);
4926 Address
Replacement(Ptr
, CGF
.ConvertTypeForMem(VDType
),
4927 CGF
.getContext().getDeclAlign(Pair
.first
));
4928 Pair
.second
.first
= Replacement
;
4932 if (Data
.Reductions
) {
4933 OMPPrivateScope
FirstprivateScope(CGF
);
4934 for (const auto &Pair
: FirstprivatePtrs
) {
4935 Address
Replacement(
4936 CGF
.Builder
.CreateLoad(Pair
.second
),
4937 CGF
.ConvertTypeForMem(Pair
.first
->getType().getNonReferenceType()),
4938 CGF
.getContext().getDeclAlign(Pair
.first
));
4939 FirstprivateScope
.addPrivate(Pair
.first
, Replacement
);
4941 (void)FirstprivateScope
.Privatize();
4942 OMPLexicalScope
LexScope(CGF
, S
, CapturedRegion
);
4943 ReductionCodeGen
RedCG(Data
.ReductionVars
, Data
.ReductionVars
,
4944 Data
.ReductionCopies
, Data
.ReductionOps
);
4945 llvm::Value
*ReductionsPtr
= CGF
.Builder
.CreateLoad(
4946 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(9)));
4947 for (unsigned Cnt
= 0, E
= Data
.ReductionVars
.size(); Cnt
< E
; ++Cnt
) {
4948 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
4949 RedCG
.emitAggregateType(CGF
, Cnt
);
4950 // FIXME: This must removed once the runtime library is fixed.
4951 // Emit required threadprivate variables for
4952 // initializer/combiner/finalizer.
4953 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
4955 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
4956 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
4957 Replacement
= Address(
4958 CGF
.EmitScalarConversion(Replacement
.emitRawPointer(CGF
),
4959 CGF
.getContext().VoidPtrTy
,
4960 CGF
.getContext().getPointerType(
4961 Data
.ReductionCopies
[Cnt
]->getType()),
4962 Data
.ReductionCopies
[Cnt
]->getExprLoc()),
4963 CGF
.ConvertTypeForMem(Data
.ReductionCopies
[Cnt
]->getType()),
4964 Replacement
.getAlignment());
4965 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
4966 Scope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
4969 // Privatize all private variables except for in_reduction items.
4970 (void)Scope
.Privatize();
4971 SmallVector
<const Expr
*, 4> InRedVars
;
4972 SmallVector
<const Expr
*, 4> InRedPrivs
;
4973 SmallVector
<const Expr
*, 4> InRedOps
;
4974 SmallVector
<const Expr
*, 4> TaskgroupDescriptors
;
4975 for (const auto *C
: S
.getClausesOfKind
<OMPInReductionClause
>()) {
4976 auto IPriv
= C
->privates().begin();
4977 auto IRed
= C
->reduction_ops().begin();
4978 auto ITD
= C
->taskgroup_descriptors().begin();
4979 for (const Expr
*Ref
: C
->varlists()) {
4980 InRedVars
.emplace_back(Ref
);
4981 InRedPrivs
.emplace_back(*IPriv
);
4982 InRedOps
.emplace_back(*IRed
);
4983 TaskgroupDescriptors
.emplace_back(*ITD
);
4984 std::advance(IPriv
, 1);
4985 std::advance(IRed
, 1);
4986 std::advance(ITD
, 1);
4989 // Privatize in_reduction items here, because taskgroup descriptors must be
4990 // privatized earlier.
4991 OMPPrivateScope
InRedScope(CGF
);
4992 if (!InRedVars
.empty()) {
4993 ReductionCodeGen
RedCG(InRedVars
, InRedVars
, InRedPrivs
, InRedOps
);
4994 for (unsigned Cnt
= 0, E
= InRedVars
.size(); Cnt
< E
; ++Cnt
) {
4995 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
4996 RedCG
.emitAggregateType(CGF
, Cnt
);
4997 // The taskgroup descriptor variable is always implicit firstprivate and
4998 // privatized already during processing of the firstprivates.
4999 // FIXME: This must removed once the runtime library is fixed.
5000 // Emit required threadprivate variables for
5001 // initializer/combiner/finalizer.
5002 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
5004 llvm::Value
*ReductionsPtr
;
5005 if (const Expr
*TRExpr
= TaskgroupDescriptors
[Cnt
]) {
5006 ReductionsPtr
= CGF
.EmitLoadOfScalar(CGF
.EmitLValue(TRExpr
),
5007 TRExpr
->getExprLoc());
5009 ReductionsPtr
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5011 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
5012 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
5013 Replacement
= Address(
5014 CGF
.EmitScalarConversion(
5015 Replacement
.emitRawPointer(CGF
), CGF
.getContext().VoidPtrTy
,
5016 CGF
.getContext().getPointerType(InRedPrivs
[Cnt
]->getType()),
5017 InRedPrivs
[Cnt
]->getExprLoc()),
5018 CGF
.ConvertTypeForMem(InRedPrivs
[Cnt
]->getType()),
5019 Replacement
.getAlignment());
5020 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
5021 InRedScope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
5024 (void)InRedScope
.Privatize();
5026 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII
LocalVarsScope(CGF
,
5031 llvm::Function
*OutlinedFn
= CGM
.getOpenMPRuntime().emitTaskOutlinedFunction(
5032 S
, *I
, *PartId
, *TaskT
, S
.getDirectiveKind(), CodeGen
, Data
.Tied
,
5033 Data
.NumberOfParts
);
5034 OMPLexicalScope
Scope(*this, S
, std::nullopt
,
5035 !isOpenMPParallelDirective(S
.getDirectiveKind()) &&
5036 !isOpenMPSimdDirective(S
.getDirectiveKind()));
5037 TaskGen(*this, OutlinedFn
, Data
);
5040 static ImplicitParamDecl
*
5041 createImplicitFirstprivateForType(ASTContext
&C
, OMPTaskDataTy
&Data
,
5042 QualType Ty
, CapturedDecl
*CD
,
5043 SourceLocation Loc
) {
5044 auto *OrigVD
= ImplicitParamDecl::Create(C
, CD
, Loc
, /*Id=*/nullptr, Ty
,
5045 ImplicitParamKind::Other
);
5046 auto *OrigRef
= DeclRefExpr::Create(
5047 C
, NestedNameSpecifierLoc(), SourceLocation(), OrigVD
,
5048 /*RefersToEnclosingVariableOrCapture=*/false, Loc
, Ty
, VK_LValue
);
5049 auto *PrivateVD
= ImplicitParamDecl::Create(C
, CD
, Loc
, /*Id=*/nullptr, Ty
,
5050 ImplicitParamKind::Other
);
5051 auto *PrivateRef
= DeclRefExpr::Create(
5052 C
, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD
,
5053 /*RefersToEnclosingVariableOrCapture=*/false, Loc
, Ty
, VK_LValue
);
5054 QualType ElemType
= C
.getBaseElementType(Ty
);
5055 auto *InitVD
= ImplicitParamDecl::Create(C
, CD
, Loc
, /*Id=*/nullptr, ElemType
,
5056 ImplicitParamKind::Other
);
5057 auto *InitRef
= DeclRefExpr::Create(
5058 C
, NestedNameSpecifierLoc(), SourceLocation(), InitVD
,
5059 /*RefersToEnclosingVariableOrCapture=*/false, Loc
, ElemType
, VK_LValue
);
5060 PrivateVD
->setInitStyle(VarDecl::CInit
);
5061 PrivateVD
->setInit(ImplicitCastExpr::Create(C
, ElemType
, CK_LValueToRValue
,
5062 InitRef
, /*BasePath=*/nullptr,
5063 VK_PRValue
, FPOptionsOverride()));
5064 Data
.FirstprivateVars
.emplace_back(OrigRef
);
5065 Data
.FirstprivateCopies
.emplace_back(PrivateRef
);
5066 Data
.FirstprivateInits
.emplace_back(InitRef
);
5070 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5071 const OMPExecutableDirective
&S
, const RegionCodeGenTy
&BodyGen
,
5072 OMPTargetDataInfo
&InputInfo
) {
5073 // Emit outlined function for task construct.
5074 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_task
);
5075 Address CapturedStruct
= GenerateCapturedStmtArgument(*CS
);
5076 QualType SharedsTy
= getContext().getRecordType(CS
->getCapturedRecordDecl());
5077 auto I
= CS
->getCapturedDecl()->param_begin();
5078 auto PartId
= std::next(I
);
5079 auto TaskT
= std::next(I
, 4);
5081 // The task is not final.
5082 Data
.Final
.setInt(/*IntVal=*/false);
5083 // Get list of firstprivate variables.
5084 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
5085 auto IRef
= C
->varlist_begin();
5086 auto IElemInitRef
= C
->inits().begin();
5087 for (auto *IInit
: C
->private_copies()) {
5088 Data
.FirstprivateVars
.push_back(*IRef
);
5089 Data
.FirstprivateCopies
.push_back(IInit
);
5090 Data
.FirstprivateInits
.push_back(*IElemInitRef
);
5095 SmallVector
<const Expr
*, 4> LHSs
;
5096 SmallVector
<const Expr
*, 4> RHSs
;
5097 for (const auto *C
: S
.getClausesOfKind
<OMPInReductionClause
>()) {
5098 Data
.ReductionVars
.append(C
->varlist_begin(), C
->varlist_end());
5099 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
5100 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
5101 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
5102 C
->reduction_ops().end());
5103 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
5104 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
5106 OMPPrivateScope
TargetScope(*this);
5107 VarDecl
*BPVD
= nullptr;
5108 VarDecl
*PVD
= nullptr;
5109 VarDecl
*SVD
= nullptr;
5110 VarDecl
*MVD
= nullptr;
5111 if (InputInfo
.NumberOfTargetItems
> 0) {
5112 auto *CD
= CapturedDecl::Create(
5113 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5114 llvm::APInt
ArrSize(/*numBits=*/32, InputInfo
.NumberOfTargetItems
);
5115 QualType BaseAndPointerAndMapperType
= getContext().getConstantArrayType(
5116 getContext().VoidPtrTy
, ArrSize
, nullptr, ArraySizeModifier::Normal
,
5117 /*IndexTypeQuals=*/0);
5118 BPVD
= createImplicitFirstprivateForType(
5119 getContext(), Data
, BaseAndPointerAndMapperType
, CD
, S
.getBeginLoc());
5120 PVD
= createImplicitFirstprivateForType(
5121 getContext(), Data
, BaseAndPointerAndMapperType
, CD
, S
.getBeginLoc());
5122 QualType SizesType
= getContext().getConstantArrayType(
5123 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5124 ArrSize
, nullptr, ArraySizeModifier::Normal
,
5125 /*IndexTypeQuals=*/0);
5126 SVD
= createImplicitFirstprivateForType(getContext(), Data
, SizesType
, CD
,
5128 TargetScope
.addPrivate(BPVD
, InputInfo
.BasePointersArray
);
5129 TargetScope
.addPrivate(PVD
, InputInfo
.PointersArray
);
5130 TargetScope
.addPrivate(SVD
, InputInfo
.SizesArray
);
5131 // If there is no user-defined mapper, the mapper array will be nullptr. In
5132 // this case, we don't need to privatize it.
5133 if (!isa_and_nonnull
<llvm::ConstantPointerNull
>(
5134 InputInfo
.MappersArray
.emitRawPointer(*this))) {
5135 MVD
= createImplicitFirstprivateForType(
5136 getContext(), Data
, BaseAndPointerAndMapperType
, CD
, S
.getBeginLoc());
5137 TargetScope
.addPrivate(MVD
, InputInfo
.MappersArray
);
5140 (void)TargetScope
.Privatize();
5141 buildDependences(S
, Data
);
5142 auto &&CodeGen
= [&Data
, &S
, CS
, &BodyGen
, BPVD
, PVD
, SVD
, MVD
,
5143 &InputInfo
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5144 // Set proper addresses for generated private copies.
5145 OMPPrivateScope
Scope(CGF
);
5146 if (!Data
.FirstprivateVars
.empty()) {
5147 enum { PrivatesParam
= 2, CopyFnParam
= 3 };
5148 llvm::Value
*CopyFn
= CGF
.Builder
.CreateLoad(
5149 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(CopyFnParam
)));
5150 llvm::Value
*PrivatesPtr
= CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(
5151 CS
->getCapturedDecl()->getParam(PrivatesParam
)));
5153 llvm::SmallVector
<std::pair
<const VarDecl
*, Address
>, 16> PrivatePtrs
;
5154 llvm::SmallVector
<llvm::Value
*, 16> CallArgs
;
5155 llvm::SmallVector
<llvm::Type
*, 4> ParamTypes
;
5156 CallArgs
.push_back(PrivatesPtr
);
5157 ParamTypes
.push_back(PrivatesPtr
->getType());
5158 for (const Expr
*E
: Data
.FirstprivateVars
) {
5159 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
5160 RawAddress PrivatePtr
=
5161 CGF
.CreateMemTemp(CGF
.getContext().getPointerType(E
->getType()),
5162 ".firstpriv.ptr.addr");
5163 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
5164 CallArgs
.push_back(PrivatePtr
.getPointer());
5165 ParamTypes
.push_back(PrivatePtr
.getType());
5167 auto *CopyFnTy
= llvm::FunctionType::get(CGF
.Builder
.getVoidTy(),
5168 ParamTypes
, /*isVarArg=*/false);
5169 CGF
.CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(
5170 CGF
, S
.getBeginLoc(), {CopyFnTy
, CopyFn
}, CallArgs
);
5171 for (const auto &Pair
: PrivatePtrs
) {
5172 Address
Replacement(
5173 CGF
.Builder
.CreateLoad(Pair
.second
),
5174 CGF
.ConvertTypeForMem(Pair
.first
->getType().getNonReferenceType()),
5175 CGF
.getContext().getDeclAlign(Pair
.first
));
5176 Scope
.addPrivate(Pair
.first
, Replacement
);
5179 CGF
.processInReduction(S
, Data
, CGF
, CS
, Scope
);
5180 if (InputInfo
.NumberOfTargetItems
> 0) {
5181 InputInfo
.BasePointersArray
= CGF
.Builder
.CreateConstArrayGEP(
5182 CGF
.GetAddrOfLocalVar(BPVD
), /*Index=*/0);
5183 InputInfo
.PointersArray
= CGF
.Builder
.CreateConstArrayGEP(
5184 CGF
.GetAddrOfLocalVar(PVD
), /*Index=*/0);
5185 InputInfo
.SizesArray
= CGF
.Builder
.CreateConstArrayGEP(
5186 CGF
.GetAddrOfLocalVar(SVD
), /*Index=*/0);
5187 // If MVD is nullptr, the mapper array is not privatized
5189 InputInfo
.MappersArray
= CGF
.Builder
.CreateConstArrayGEP(
5190 CGF
.GetAddrOfLocalVar(MVD
), /*Index=*/0);
5194 OMPLexicalScope
LexScope(CGF
, S
, OMPD_task
, /*EmitPreInitStmt=*/false);
5195 auto *TL
= S
.getSingleClause
<OMPThreadLimitClause
>();
5196 if (CGF
.CGM
.getLangOpts().OpenMP
>= 51 &&
5197 needsTaskBasedThreadLimit(S
.getDirectiveKind()) && TL
) {
5198 // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
5199 // enclosing this target region. This will indirectly set the thread_limit
5200 // for every applicable construct within target region.
5201 CGF
.CGM
.getOpenMPRuntime().emitThreadLimitClause(
5202 CGF
, TL
->getThreadLimit(), S
.getBeginLoc());
5206 llvm::Function
*OutlinedFn
= CGM
.getOpenMPRuntime().emitTaskOutlinedFunction(
5207 S
, *I
, *PartId
, *TaskT
, S
.getDirectiveKind(), CodeGen
, /*Tied=*/true,
5208 Data
.NumberOfParts
);
5209 llvm::APInt
TrueOrFalse(32, S
.hasClausesOfKind
<OMPNowaitClause
>() ? 1 : 0);
5210 IntegerLiteral
IfCond(getContext(), TrueOrFalse
,
5211 getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
5213 CGM
.getOpenMPRuntime().emitTaskCall(*this, S
.getBeginLoc(), S
, OutlinedFn
,
5214 SharedsTy
, CapturedStruct
, &IfCond
, Data
);
5217 void CodeGenFunction::processInReduction(const OMPExecutableDirective
&S
,
5218 OMPTaskDataTy
&Data
,
5219 CodeGenFunction
&CGF
,
5220 const CapturedStmt
*CS
,
5221 OMPPrivateScope
&Scope
) {
5222 if (Data
.Reductions
) {
5223 OpenMPDirectiveKind CapturedRegion
= S
.getDirectiveKind();
5224 OMPLexicalScope
LexScope(CGF
, S
, CapturedRegion
);
5225 ReductionCodeGen
RedCG(Data
.ReductionVars
, Data
.ReductionVars
,
5226 Data
.ReductionCopies
, Data
.ReductionOps
);
5227 llvm::Value
*ReductionsPtr
= CGF
.Builder
.CreateLoad(
5228 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(4)));
5229 for (unsigned Cnt
= 0, E
= Data
.ReductionVars
.size(); Cnt
< E
; ++Cnt
) {
5230 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
5231 RedCG
.emitAggregateType(CGF
, Cnt
);
5232 // FIXME: This must removed once the runtime library is fixed.
5233 // Emit required threadprivate variables for
5234 // initializer/combiner/finalizer.
5235 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
5237 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
5238 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
5239 Replacement
= Address(
5240 CGF
.EmitScalarConversion(Replacement
.emitRawPointer(CGF
),
5241 CGF
.getContext().VoidPtrTy
,
5242 CGF
.getContext().getPointerType(
5243 Data
.ReductionCopies
[Cnt
]->getType()),
5244 Data
.ReductionCopies
[Cnt
]->getExprLoc()),
5245 CGF
.ConvertTypeForMem(Data
.ReductionCopies
[Cnt
]->getType()),
5246 Replacement
.getAlignment());
5247 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
5248 Scope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
5251 (void)Scope
.Privatize();
5252 SmallVector
<const Expr
*, 4> InRedVars
;
5253 SmallVector
<const Expr
*, 4> InRedPrivs
;
5254 SmallVector
<const Expr
*, 4> InRedOps
;
5255 SmallVector
<const Expr
*, 4> TaskgroupDescriptors
;
5256 for (const auto *C
: S
.getClausesOfKind
<OMPInReductionClause
>()) {
5257 auto IPriv
= C
->privates().begin();
5258 auto IRed
= C
->reduction_ops().begin();
5259 auto ITD
= C
->taskgroup_descriptors().begin();
5260 for (const Expr
*Ref
: C
->varlists()) {
5261 InRedVars
.emplace_back(Ref
);
5262 InRedPrivs
.emplace_back(*IPriv
);
5263 InRedOps
.emplace_back(*IRed
);
5264 TaskgroupDescriptors
.emplace_back(*ITD
);
5265 std::advance(IPriv
, 1);
5266 std::advance(IRed
, 1);
5267 std::advance(ITD
, 1);
5270 OMPPrivateScope
InRedScope(CGF
);
5271 if (!InRedVars
.empty()) {
5272 ReductionCodeGen
RedCG(InRedVars
, InRedVars
, InRedPrivs
, InRedOps
);
5273 for (unsigned Cnt
= 0, E
= InRedVars
.size(); Cnt
< E
; ++Cnt
) {
5274 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
5275 RedCG
.emitAggregateType(CGF
, Cnt
);
5276 // FIXME: This must removed once the runtime library is fixed.
5277 // Emit required threadprivate variables for
5278 // initializer/combiner/finalizer.
5279 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
5281 llvm::Value
*ReductionsPtr
;
5282 if (const Expr
*TRExpr
= TaskgroupDescriptors
[Cnt
]) {
5284 CGF
.EmitLoadOfScalar(CGF
.EmitLValue(TRExpr
), TRExpr
->getExprLoc());
5286 ReductionsPtr
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5288 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
5289 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
5290 Replacement
= Address(
5291 CGF
.EmitScalarConversion(
5292 Replacement
.emitRawPointer(CGF
), CGF
.getContext().VoidPtrTy
,
5293 CGF
.getContext().getPointerType(InRedPrivs
[Cnt
]->getType()),
5294 InRedPrivs
[Cnt
]->getExprLoc()),
5295 CGF
.ConvertTypeForMem(InRedPrivs
[Cnt
]->getType()),
5296 Replacement
.getAlignment());
5297 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
5298 InRedScope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
5301 (void)InRedScope
.Privatize();
5304 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective
&S
) {
5305 // Emit outlined function for task construct.
5306 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_task
);
5307 Address CapturedStruct
= GenerateCapturedStmtArgument(*CS
);
5308 QualType SharedsTy
= getContext().getRecordType(CS
->getCapturedRecordDecl());
5309 const Expr
*IfCond
= nullptr;
5310 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
5311 if (C
->getNameModifier() == OMPD_unknown
||
5312 C
->getNameModifier() == OMPD_task
) {
5313 IfCond
= C
->getCondition();
5319 // Check if we should emit tied or untied task.
5320 Data
.Tied
= !S
.getSingleClause
<OMPUntiedClause
>();
5321 auto &&BodyGen
= [CS
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5322 CGF
.EmitStmt(CS
->getCapturedStmt());
5324 auto &&TaskGen
= [&S
, SharedsTy
, CapturedStruct
,
5325 IfCond
](CodeGenFunction
&CGF
, llvm::Function
*OutlinedFn
,
5326 const OMPTaskDataTy
&Data
) {
5327 CGF
.CGM
.getOpenMPRuntime().emitTaskCall(CGF
, S
.getBeginLoc(), S
, OutlinedFn
,
5328 SharedsTy
, CapturedStruct
, IfCond
,
5332 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
5333 EmitOMPTaskBasedDirective(S
, OMPD_task
, BodyGen
, TaskGen
, Data
);
5336 void CodeGenFunction::EmitOMPTaskyieldDirective(
5337 const OMPTaskyieldDirective
&S
) {
5338 CGM
.getOpenMPRuntime().emitTaskyieldCall(*this, S
.getBeginLoc());
5341 void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective
&S
) {
5342 const OMPMessageClause
*MC
= S
.getSingleClause
<OMPMessageClause
>();
5343 Expr
*ME
= MC
? MC
->getMessageString() : nullptr;
5344 const OMPSeverityClause
*SC
= S
.getSingleClause
<OMPSeverityClause
>();
5345 bool IsFatal
= false;
5346 if (!SC
|| SC
->getSeverityKind() == OMPC_SEVERITY_fatal
)
5348 CGM
.getOpenMPRuntime().emitErrorCall(*this, S
.getBeginLoc(), ME
, IsFatal
);
5351 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective
&S
) {
5352 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(), OMPD_barrier
);
5355 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective
&S
) {
5357 // Build list of dependences
5358 buildDependences(S
, Data
);
5359 Data
.HasNowaitClause
= S
.hasClausesOfKind
<OMPNowaitClause
>();
5360 CGM
.getOpenMPRuntime().emitTaskwaitCall(*this, S
.getBeginLoc(), Data
);
5363 bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective
&T
) {
5364 return T
.clauses().empty();
5367 void CodeGenFunction::EmitOMPTaskgroupDirective(
5368 const OMPTaskgroupDirective
&S
) {
5369 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5370 if (CGM
.getLangOpts().OpenMPIRBuilder
&& isSupportedByOpenMPIRBuilder(S
)) {
5371 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
5372 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
5373 InsertPointTy
AllocaIP(AllocaInsertPt
->getParent(),
5374 AllocaInsertPt
->getIterator());
5376 auto BodyGenCB
= [&, this](InsertPointTy AllocaIP
,
5377 InsertPointTy CodeGenIP
) {
5378 Builder
.restoreIP(CodeGenIP
);
5379 EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
5381 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo
;
5382 if (!CapturedStmtInfo
)
5383 CapturedStmtInfo
= &CapStmtInfo
;
5384 Builder
.restoreIP(OMPBuilder
.createTaskgroup(Builder
, AllocaIP
, BodyGenCB
));
5387 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5389 if (const Expr
*E
= S
.getReductionRef()) {
5390 SmallVector
<const Expr
*, 4> LHSs
;
5391 SmallVector
<const Expr
*, 4> RHSs
;
5393 for (const auto *C
: S
.getClausesOfKind
<OMPTaskReductionClause
>()) {
5394 Data
.ReductionVars
.append(C
->varlist_begin(), C
->varlist_end());
5395 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
5396 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
5397 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
5398 C
->reduction_ops().end());
5399 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
5400 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
5402 llvm::Value
*ReductionDesc
=
5403 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionInit(CGF
, S
.getBeginLoc(),
5405 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
5406 CGF
.EmitVarDecl(*VD
);
5407 CGF
.EmitStoreOfScalar(ReductionDesc
, CGF
.GetAddrOfLocalVar(VD
),
5408 /*Volatile=*/false, E
->getType());
5410 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
5412 CGM
.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen
, S
.getBeginLoc());
5415 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective
&S
) {
5416 llvm::AtomicOrdering AO
= S
.getSingleClause
<OMPFlushClause
>()
5417 ? llvm::AtomicOrdering::NotAtomic
5418 : llvm::AtomicOrdering::AcquireRelease
;
5419 CGM
.getOpenMPRuntime().emitFlush(
5421 [&S
]() -> ArrayRef
<const Expr
*> {
5422 if (const auto *FlushClause
= S
.getSingleClause
<OMPFlushClause
>())
5423 return llvm::ArrayRef(FlushClause
->varlist_begin(),
5424 FlushClause
->varlist_end());
5425 return std::nullopt
;
5427 S
.getBeginLoc(), AO
);
5430 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective
&S
) {
5431 const auto *DO
= S
.getSingleClause
<OMPDepobjClause
>();
5432 LValue DOLVal
= EmitLValue(DO
->getDepobj());
5433 if (const auto *DC
= S
.getSingleClause
<OMPDependClause
>()) {
5434 OMPTaskDataTy::DependData
Dependencies(DC
->getDependencyKind(),
5436 Dependencies
.DepExprs
.append(DC
->varlist_begin(), DC
->varlist_end());
5437 Address DepAddr
= CGM
.getOpenMPRuntime().emitDepobjDependClause(
5438 *this, Dependencies
, DC
->getBeginLoc());
5439 EmitStoreOfScalar(DepAddr
.emitRawPointer(*this), DOLVal
);
5442 if (const auto *DC
= S
.getSingleClause
<OMPDestroyClause
>()) {
5443 CGM
.getOpenMPRuntime().emitDestroyClause(*this, DOLVal
, DC
->getBeginLoc());
5446 if (const auto *UC
= S
.getSingleClause
<OMPUpdateClause
>()) {
5447 CGM
.getOpenMPRuntime().emitUpdateClause(
5448 *this, DOLVal
, UC
->getDependencyKind(), UC
->getBeginLoc());
5453 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective
&S
) {
5454 if (!OMPParentLoopDirectiveForScan
)
5456 const OMPExecutableDirective
&ParentDir
= *OMPParentLoopDirectiveForScan
;
5457 bool IsInclusive
= S
.hasClausesOfKind
<OMPInclusiveClause
>();
5458 SmallVector
<const Expr
*, 4> Shareds
;
5459 SmallVector
<const Expr
*, 4> Privates
;
5460 SmallVector
<const Expr
*, 4> LHSs
;
5461 SmallVector
<const Expr
*, 4> RHSs
;
5462 SmallVector
<const Expr
*, 4> ReductionOps
;
5463 SmallVector
<const Expr
*, 4> CopyOps
;
5464 SmallVector
<const Expr
*, 4> CopyArrayTemps
;
5465 SmallVector
<const Expr
*, 4> CopyArrayElems
;
5466 for (const auto *C
: ParentDir
.getClausesOfKind
<OMPReductionClause
>()) {
5467 if (C
->getModifier() != OMPC_REDUCTION_inscan
)
5469 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
5470 Privates
.append(C
->privates().begin(), C
->privates().end());
5471 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
5472 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
5473 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
5474 CopyOps
.append(C
->copy_ops().begin(), C
->copy_ops().end());
5475 CopyArrayTemps
.append(C
->copy_array_temps().begin(),
5476 C
->copy_array_temps().end());
5477 CopyArrayElems
.append(C
->copy_array_elems().begin(),
5478 C
->copy_array_elems().end());
5480 if (ParentDir
.getDirectiveKind() == OMPD_simd
||
5481 (getLangOpts().OpenMPSimd
&&
5482 isOpenMPSimdDirective(ParentDir
.getDirectiveKind()))) {
5483 // For simd directive and simd-based directives in simd only mode, use the
5484 // following codegen:
5486 // #pragma omp simd reduction(inscan, +: x)
5489 // #pragma omp scan inclusive(x)
5492 // is transformed to:
5503 // #pragma omp simd reduction(inscan, +: x)
5506 // #pragma omp scan exclusive(x)
5519 llvm::BasicBlock
*OMPScanReduce
= createBasicBlock("omp.inscan.reduce");
5520 EmitBranch(IsInclusive
5522 : BreakContinueStack
.back().ContinueBlock
.getBlock());
5523 EmitBlock(OMPScanDispatch
);
5525 // New scope for correct construction/destruction of temp variables for
5527 LexicalScope
Scope(*this, S
.getSourceRange());
5528 EmitBranch(IsInclusive
? OMPBeforeScanBlock
: OMPAfterScanBlock
);
5529 EmitBlock(OMPScanReduce
);
5531 // Create temp var and copy LHS value to this temp value.
5533 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5534 const Expr
*PrivateExpr
= Privates
[I
];
5535 const Expr
*TempExpr
= CopyArrayTemps
[I
];
5537 *cast
<VarDecl
>(cast
<DeclRefExpr
>(TempExpr
)->getDecl()));
5538 LValue DestLVal
= EmitLValue(TempExpr
);
5539 LValue SrcLVal
= EmitLValue(LHSs
[I
]);
5540 EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(),
5541 SrcLVal
.getAddress(),
5542 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5543 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
5547 CGM
.getOpenMPRuntime().emitReduction(
5548 *this, ParentDir
.getEndLoc(), Privates
, LHSs
, RHSs
, ReductionOps
,
5549 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd
});
5550 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5551 const Expr
*PrivateExpr
= Privates
[I
];
5555 DestLVal
= EmitLValue(RHSs
[I
]);
5556 SrcLVal
= EmitLValue(LHSs
[I
]);
5558 const Expr
*TempExpr
= CopyArrayTemps
[I
];
5559 DestLVal
= EmitLValue(RHSs
[I
]);
5560 SrcLVal
= EmitLValue(TempExpr
);
5563 PrivateExpr
->getType(), DestLVal
.getAddress(), SrcLVal
.getAddress(),
5564 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5565 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()), CopyOps
[I
]);
5568 EmitBranch(IsInclusive
? OMPAfterScanBlock
: OMPBeforeScanBlock
);
5569 OMPScanExitBlock
= IsInclusive
5570 ? BreakContinueStack
.back().ContinueBlock
.getBlock()
5572 EmitBlock(OMPAfterScanBlock
);
5576 EmitBranch(BreakContinueStack
.back().ContinueBlock
.getBlock());
5577 EmitBlock(OMPScanExitBlock
);
5579 if (OMPFirstScanLoop
) {
5580 // Emit buffer[i] = red; at the end of the input phase.
5581 const auto *IVExpr
= cast
<OMPLoopDirective
>(ParentDir
)
5582 .getIterationVariable()
5583 ->IgnoreParenImpCasts();
5584 LValue IdxLVal
= EmitLValue(IVExpr
);
5585 llvm::Value
*IdxVal
= EmitLoadOfScalar(IdxLVal
, IVExpr
->getExprLoc());
5586 IdxVal
= Builder
.CreateIntCast(IdxVal
, SizeTy
, /*isSigned=*/false);
5587 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5588 const Expr
*PrivateExpr
= Privates
[I
];
5589 const Expr
*OrigExpr
= Shareds
[I
];
5590 const Expr
*CopyArrayElem
= CopyArrayElems
[I
];
5591 OpaqueValueMapping
IdxMapping(
5593 cast
<OpaqueValueExpr
>(
5594 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
5595 RValue::get(IdxVal
));
5596 LValue DestLVal
= EmitLValue(CopyArrayElem
);
5597 LValue SrcLVal
= EmitLValue(OrigExpr
);
5599 PrivateExpr
->getType(), DestLVal
.getAddress(), SrcLVal
.getAddress(),
5600 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5601 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()), CopyOps
[I
]);
5604 EmitBranch(BreakContinueStack
.back().ContinueBlock
.getBlock());
5606 EmitBlock(OMPScanExitBlock
);
5607 EmitBranch(BreakContinueStack
.back().ContinueBlock
.getBlock());
5609 EmitBlock(OMPScanDispatch
);
5610 if (!OMPFirstScanLoop
) {
5611 // Emit red = buffer[i]; at the entrance to the scan phase.
5612 const auto *IVExpr
= cast
<OMPLoopDirective
>(ParentDir
)
5613 .getIterationVariable()
5614 ->IgnoreParenImpCasts();
5615 LValue IdxLVal
= EmitLValue(IVExpr
);
5616 llvm::Value
*IdxVal
= EmitLoadOfScalar(IdxLVal
, IVExpr
->getExprLoc());
5617 IdxVal
= Builder
.CreateIntCast(IdxVal
, SizeTy
, /*isSigned=*/false);
5618 llvm::BasicBlock
*ExclusiveExitBB
= nullptr;
5620 llvm::BasicBlock
*ContBB
= createBasicBlock("omp.exclusive.dec");
5621 ExclusiveExitBB
= createBasicBlock("omp.exclusive.copy.exit");
5622 llvm::Value
*Cmp
= Builder
.CreateIsNull(IdxVal
);
5623 Builder
.CreateCondBr(Cmp
, ExclusiveExitBB
, ContBB
);
5625 // Use idx - 1 iteration for exclusive scan.
5626 IdxVal
= Builder
.CreateNUWSub(IdxVal
, llvm::ConstantInt::get(SizeTy
, 1));
5628 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5629 const Expr
*PrivateExpr
= Privates
[I
];
5630 const Expr
*OrigExpr
= Shareds
[I
];
5631 const Expr
*CopyArrayElem
= CopyArrayElems
[I
];
5632 OpaqueValueMapping
IdxMapping(
5634 cast
<OpaqueValueExpr
>(
5635 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
5636 RValue::get(IdxVal
));
5637 LValue SrcLVal
= EmitLValue(CopyArrayElem
);
5638 LValue DestLVal
= EmitLValue(OrigExpr
);
5640 PrivateExpr
->getType(), DestLVal
.getAddress(), SrcLVal
.getAddress(),
5641 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5642 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()), CopyOps
[I
]);
5645 EmitBlock(ExclusiveExitBB
);
5648 EmitBranch((OMPFirstScanLoop
== IsInclusive
) ? OMPBeforeScanBlock
5649 : OMPAfterScanBlock
);
5650 EmitBlock(OMPAfterScanBlock
);
5653 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective
&S
,
5654 const CodeGenLoopTy
&CodeGenLoop
,
5656 // Emit the loop iteration variable.
5657 const auto *IVExpr
= cast
<DeclRefExpr
>(S
.getIterationVariable());
5658 const auto *IVDecl
= cast
<VarDecl
>(IVExpr
->getDecl());
5659 EmitVarDecl(*IVDecl
);
5661 // Emit the iterations count variable.
5662 // If it is not a variable, Sema decided to calculate iterations count on each
5663 // iteration (e.g., it is foldable into a constant).
5664 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
5665 EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
5666 // Emit calculation of the iterations count.
5667 EmitIgnoredExpr(S
.getCalcLastIteration());
5670 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
5672 bool HasLastprivateClause
= false;
5673 // Check pre-condition.
5675 OMPLoopScope
PreInitScope(*this, S
);
5676 // Skip the entire loop if we don't meet the precondition.
5677 // If the condition constant folds and can be elided, avoid emitting the
5680 llvm::BasicBlock
*ContBlock
= nullptr;
5681 if (ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
5685 llvm::BasicBlock
*ThenBlock
= createBasicBlock("omp.precond.then");
5686 ContBlock
= createBasicBlock("omp.precond.end");
5687 emitPreCond(*this, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
5688 getProfileCount(&S
));
5689 EmitBlock(ThenBlock
);
5690 incrementProfileCounter(&S
);
5693 emitAlignedClause(*this, S
);
5694 // Emit 'then' code.
5696 // Emit helper vars inits.
5698 LValue LB
= EmitOMPHelperVar(
5699 *this, cast
<DeclRefExpr
>(
5700 (isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5701 ? S
.getCombinedLowerBoundVariable()
5702 : S
.getLowerBoundVariable())));
5703 LValue UB
= EmitOMPHelperVar(
5704 *this, cast
<DeclRefExpr
>(
5705 (isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5706 ? S
.getCombinedUpperBoundVariable()
5707 : S
.getUpperBoundVariable())));
5709 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getStrideVariable()));
5711 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getIsLastIterVariable()));
5713 OMPPrivateScope
LoopScope(*this);
5714 if (EmitOMPFirstprivateClause(S
, LoopScope
)) {
5715 // Emit implicit barrier to synchronize threads and avoid data races
5716 // on initialization of firstprivate variables and post-update of
5717 // lastprivate variables.
5718 CGM
.getOpenMPRuntime().emitBarrierCall(
5719 *this, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
5720 /*ForceSimpleCall=*/true);
5722 EmitOMPPrivateClause(S
, LoopScope
);
5723 if (isOpenMPSimdDirective(S
.getDirectiveKind()) &&
5724 !isOpenMPParallelDirective(S
.getDirectiveKind()) &&
5725 !isOpenMPTeamsDirective(S
.getDirectiveKind()))
5726 EmitOMPReductionClauseInit(S
, LoopScope
);
5727 HasLastprivateClause
= EmitOMPLastprivateClauseInit(S
, LoopScope
);
5728 EmitOMPPrivateLoopCounters(S
, LoopScope
);
5729 (void)LoopScope
.Privatize();
5730 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
5731 CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S
);
5733 // Detect the distribute schedule kind and chunk.
5734 llvm::Value
*Chunk
= nullptr;
5735 OpenMPDistScheduleClauseKind ScheduleKind
= OMPC_DIST_SCHEDULE_unknown
;
5736 if (const auto *C
= S
.getSingleClause
<OMPDistScheduleClause
>()) {
5737 ScheduleKind
= C
->getDistScheduleKind();
5738 if (const Expr
*Ch
= C
->getChunkSize()) {
5739 Chunk
= EmitScalarExpr(Ch
);
5740 Chunk
= EmitScalarConversion(Chunk
, Ch
->getType(),
5741 S
.getIterationVariable()->getType(),
5745 // Default behaviour for dist_schedule clause.
5746 CGM
.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5747 *this, S
, ScheduleKind
, Chunk
);
5749 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
5750 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
5752 // OpenMP [2.10.8, distribute Construct, Description]
5753 // If dist_schedule is specified, kind must be static. If specified,
5754 // iterations are divided into chunks of size chunk_size, chunks are
5755 // assigned to the teams of the league in a round-robin fashion in the
5756 // order of the team number. When no chunk_size is specified, the
5757 // iteration space is divided into chunks that are approximately equal
5758 // in size, and at most one chunk is distributed to each team of the
5759 // league. The size of the chunks is unspecified in this case.
5760 bool StaticChunked
=
5761 RT
.isStaticChunked(ScheduleKind
, /* Chunked */ Chunk
!= nullptr) &&
5762 isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind());
5763 if (RT
.isStaticNonchunked(ScheduleKind
,
5764 /* Chunked */ Chunk
!= nullptr) ||
5766 CGOpenMPRuntime::StaticRTInput
StaticInit(
5767 IVSize
, IVSigned
, /* Ordered = */ false, IL
.getAddress(),
5768 LB
.getAddress(), UB
.getAddress(), ST
.getAddress(),
5769 StaticChunked
? Chunk
: nullptr);
5770 RT
.emitDistributeStaticInit(*this, S
.getBeginLoc(), ScheduleKind
,
5773 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
5774 // UB = min(UB, GlobalUB);
5775 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5776 ? S
.getCombinedEnsureUpperBound()
5777 : S
.getEnsureUpperBound());
5779 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5780 ? S
.getCombinedInit()
5784 isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5785 ? S
.getCombinedCond()
5789 Cond
= S
.getCombinedDistCond();
5791 // For static unchunked schedules generate:
5793 // 1. For distribute alone, codegen
5794 // while (idx <= UB) {
5799 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
5800 // while (idx <= UB) {
5801 // <CodeGen rest of pragma>(LB, UB);
5805 // For static chunk one schedule generate:
5807 // while (IV <= GlobalUB) {
5808 // <CodeGen rest of pragma>(LB, UB);
5811 // UB = min(UB, GlobalUB);
5817 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5818 if (isOpenMPSimdDirective(S
.getDirectiveKind()))
5819 CGF
.EmitOMPSimdInit(S
);
5821 [&S
, &LoopScope
, Cond
, IncExpr
, LoopExit
, &CodeGenLoop
,
5822 StaticChunked
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5823 CGF
.EmitOMPInnerLoop(
5824 S
, LoopScope
.requiresCleanups(), Cond
, IncExpr
,
5825 [&S
, LoopExit
, &CodeGenLoop
](CodeGenFunction
&CGF
) {
5826 CodeGenLoop(CGF
, S
, LoopExit
);
5828 [&S
, StaticChunked
](CodeGenFunction
&CGF
) {
5829 if (StaticChunked
) {
5830 CGF
.EmitIgnoredExpr(S
.getCombinedNextLowerBound());
5831 CGF
.EmitIgnoredExpr(S
.getCombinedNextUpperBound());
5832 CGF
.EmitIgnoredExpr(S
.getCombinedEnsureUpperBound());
5833 CGF
.EmitIgnoredExpr(S
.getCombinedInit());
5837 EmitBlock(LoopExit
.getBlock());
5838 // Tell the runtime we are done.
5839 RT
.emitForStaticFinish(*this, S
.getEndLoc(), OMPD_distribute
);
5841 // Emit the outer loop, which requests its work chunk [LB..UB] from
5842 // runtime and runs the inner loop to process it.
5843 const OMPLoopArguments LoopArguments
= {
5844 LB
.getAddress(), UB
.getAddress(), ST
.getAddress(), IL
.getAddress(),
5846 EmitOMPDistributeOuterLoop(ScheduleKind
, S
, LoopScope
, LoopArguments
,
5849 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
5850 EmitOMPSimdFinal(S
, [IL
, &S
](CodeGenFunction
&CGF
) {
5851 return CGF
.Builder
.CreateIsNotNull(
5852 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
5855 if (isOpenMPSimdDirective(S
.getDirectiveKind()) &&
5856 !isOpenMPParallelDirective(S
.getDirectiveKind()) &&
5857 !isOpenMPTeamsDirective(S
.getDirectiveKind())) {
5858 EmitOMPReductionClauseFinal(S
, OMPD_simd
);
5859 // Emit post-update of the reduction variables if IsLastIter != 0.
5860 emitPostUpdateForReductionClause(
5861 *this, S
, [IL
, &S
](CodeGenFunction
&CGF
) {
5862 return CGF
.Builder
.CreateIsNotNull(
5863 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
5866 // Emit final copy of the lastprivate variables if IsLastIter != 0.
5867 if (HasLastprivateClause
) {
5868 EmitOMPLastprivateClauseFinal(
5869 S
, /*NoFinals=*/false,
5870 Builder
.CreateIsNotNull(EmitLoadOfScalar(IL
, S
.getBeginLoc())));
5874 // We're now done with the loop, so jump to the continuation block.
5876 EmitBranch(ContBlock
);
5877 EmitBlock(ContBlock
, true);
5882 void CodeGenFunction::EmitOMPDistributeDirective(
5883 const OMPDistributeDirective
&S
) {
5884 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5885 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
5887 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5888 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute
, CodeGen
);
5891 static llvm::Function
*emitOutlinedOrderedFunction(CodeGenModule
&CGM
,
5892 const CapturedStmt
*S
,
5893 SourceLocation Loc
) {
5894 CodeGenFunction
CGF(CGM
, /*suppressNewContext=*/true);
5895 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo
;
5896 CGF
.CapturedStmtInfo
= &CapStmtInfo
;
5897 llvm::Function
*Fn
= CGF
.GenerateOpenMPCapturedStmtFunction(*S
, Loc
);
5898 Fn
->setDoesNotRecurse();
5902 template <typename T
>
5903 static void emitRestoreIP(CodeGenFunction
&CGF
, const T
*C
,
5904 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP
,
5905 llvm::OpenMPIRBuilder
&OMPBuilder
) {
5907 unsigned NumLoops
= C
->getNumLoops();
5908 QualType Int64Ty
= CGF
.CGM
.getContext().getIntTypeForBitwidth(
5909 /*DestWidth=*/64, /*Signed=*/1);
5910 llvm::SmallVector
<llvm::Value
*> StoreValues
;
5911 for (unsigned I
= 0; I
< NumLoops
; I
++) {
5912 const Expr
*CounterVal
= C
->getLoopData(I
);
5914 llvm::Value
*StoreValue
= CGF
.EmitScalarConversion(
5915 CGF
.EmitScalarExpr(CounterVal
), CounterVal
->getType(), Int64Ty
,
5916 CounterVal
->getExprLoc());
5917 StoreValues
.emplace_back(StoreValue
);
5919 OMPDoacrossKind
<T
> ODK
;
5920 bool IsDependSource
= ODK
.isSource(C
);
5921 CGF
.Builder
.restoreIP(
5922 OMPBuilder
.createOrderedDepend(CGF
.Builder
, AllocaIP
, NumLoops
,
5923 StoreValues
, ".cnt.addr", IsDependSource
));
5926 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective
&S
) {
5927 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
5928 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
5929 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
5931 if (S
.hasClausesOfKind
<OMPDependClause
>() ||
5932 S
.hasClausesOfKind
<OMPDoacrossClause
>()) {
5933 // The ordered directive with depend clause.
5934 assert(!S
.hasAssociatedStmt() && "No associated statement must be in "
5935 "ordered depend|doacross construct.");
5936 InsertPointTy
AllocaIP(AllocaInsertPt
->getParent(),
5937 AllocaInsertPt
->getIterator());
5938 for (const auto *DC
: S
.getClausesOfKind
<OMPDependClause
>())
5939 emitRestoreIP(*this, DC
, AllocaIP
, OMPBuilder
);
5940 for (const auto *DC
: S
.getClausesOfKind
<OMPDoacrossClause
>())
5941 emitRestoreIP(*this, DC
, AllocaIP
, OMPBuilder
);
5943 // The ordered directive with threads or simd clause, or without clause.
5944 // Without clause, it behaves as if the threads clause is specified.
5945 const auto *C
= S
.getSingleClause
<OMPSIMDClause
>();
5947 auto FiniCB
= [this](InsertPointTy IP
) {
5948 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
5951 auto BodyGenCB
= [&S
, C
, this](InsertPointTy AllocaIP
,
5952 InsertPointTy CodeGenIP
) {
5953 Builder
.restoreIP(CodeGenIP
);
5955 const CapturedStmt
*CS
= S
.getInnermostCapturedStmt();
5957 llvm::BasicBlock
*FiniBB
= splitBBWithSuffix(
5958 Builder
, /*CreateBranch=*/false, ".ordered.after");
5959 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
5960 GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
5961 llvm::Function
*OutlinedFn
=
5962 emitOutlinedOrderedFunction(CGM
, CS
, S
.getBeginLoc());
5963 assert(S
.getBeginLoc().isValid() &&
5964 "Outlined function call location must be valid.");
5965 ApplyDebugLocation::CreateDefaultArtificial(*this, S
.getBeginLoc());
5966 OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP
, *FiniBB
,
5967 OutlinedFn
, CapturedVars
);
5969 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
5970 *this, CS
->getCapturedStmt(), AllocaIP
, CodeGenIP
, "ordered");
5974 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5976 OMPBuilder
.createOrderedThreadsSimd(Builder
, BodyGenCB
, FiniCB
, !C
));
5981 if (S
.hasClausesOfKind
<OMPDependClause
>()) {
5982 assert(!S
.hasAssociatedStmt() &&
5983 "No associated statement must be in ordered depend construct.");
5984 for (const auto *DC
: S
.getClausesOfKind
<OMPDependClause
>())
5985 CGM
.getOpenMPRuntime().emitDoacrossOrdered(*this, DC
);
5988 if (S
.hasClausesOfKind
<OMPDoacrossClause
>()) {
5989 assert(!S
.hasAssociatedStmt() &&
5990 "No associated statement must be in ordered doacross construct.");
5991 for (const auto *DC
: S
.getClausesOfKind
<OMPDoacrossClause
>())
5992 CGM
.getOpenMPRuntime().emitDoacrossOrdered(*this, DC
);
5995 const auto *C
= S
.getSingleClause
<OMPSIMDClause
>();
5996 auto &&CodeGen
= [&S
, C
, this](CodeGenFunction
&CGF
,
5997 PrePostActionTy
&Action
) {
5998 const CapturedStmt
*CS
= S
.getInnermostCapturedStmt();
6000 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
6001 CGF
.GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
6002 llvm::Function
*OutlinedFn
=
6003 emitOutlinedOrderedFunction(CGM
, CS
, S
.getBeginLoc());
6004 CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, S
.getBeginLoc(),
6005 OutlinedFn
, CapturedVars
);
6008 CGF
.EmitStmt(CS
->getCapturedStmt());
6011 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
6012 CGM
.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen
, S
.getBeginLoc(), !C
);
6015 static llvm::Value
*convertToScalarValue(CodeGenFunction
&CGF
, RValue Val
,
6016 QualType SrcType
, QualType DestType
,
6017 SourceLocation Loc
) {
6018 assert(CGF
.hasScalarEvaluationKind(DestType
) &&
6019 "DestType must have scalar evaluation kind.");
6020 assert(!Val
.isAggregate() && "Must be a scalar or complex.");
6021 return Val
.isScalar() ? CGF
.EmitScalarConversion(Val
.getScalarVal(), SrcType
,
6023 : CGF
.EmitComplexToScalarConversion(
6024 Val
.getComplexVal(), SrcType
, DestType
, Loc
);
6027 static CodeGenFunction::ComplexPairTy
6028 convertToComplexValue(CodeGenFunction
&CGF
, RValue Val
, QualType SrcType
,
6029 QualType DestType
, SourceLocation Loc
) {
6030 assert(CGF
.getEvaluationKind(DestType
) == TEK_Complex
&&
6031 "DestType must have complex evaluation kind.");
6032 CodeGenFunction::ComplexPairTy ComplexVal
;
6033 if (Val
.isScalar()) {
6034 // Convert the input element to the element type of the complex.
6035 QualType DestElementType
=
6036 DestType
->castAs
<ComplexType
>()->getElementType();
6037 llvm::Value
*ScalarVal
= CGF
.EmitScalarConversion(
6038 Val
.getScalarVal(), SrcType
, DestElementType
, Loc
);
6039 ComplexVal
= CodeGenFunction::ComplexPairTy(
6040 ScalarVal
, llvm::Constant::getNullValue(ScalarVal
->getType()));
6042 assert(Val
.isComplex() && "Must be a scalar or complex.");
6043 QualType SrcElementType
= SrcType
->castAs
<ComplexType
>()->getElementType();
6044 QualType DestElementType
=
6045 DestType
->castAs
<ComplexType
>()->getElementType();
6046 ComplexVal
.first
= CGF
.EmitScalarConversion(
6047 Val
.getComplexVal().first
, SrcElementType
, DestElementType
, Loc
);
6048 ComplexVal
.second
= CGF
.EmitScalarConversion(
6049 Val
.getComplexVal().second
, SrcElementType
, DestElementType
, Loc
);
6054 static void emitSimpleAtomicStore(CodeGenFunction
&CGF
, llvm::AtomicOrdering AO
,
6055 LValue LVal
, RValue RVal
) {
6056 if (LVal
.isGlobalReg())
6057 CGF
.EmitStoreThroughGlobalRegLValue(RVal
, LVal
);
6059 CGF
.EmitAtomicStore(RVal
, LVal
, AO
, LVal
.isVolatile(), /*isInit=*/false);
6062 static RValue
emitSimpleAtomicLoad(CodeGenFunction
&CGF
,
6063 llvm::AtomicOrdering AO
, LValue LVal
,
6064 SourceLocation Loc
) {
6065 if (LVal
.isGlobalReg())
6066 return CGF
.EmitLoadOfLValue(LVal
, Loc
);
6067 return CGF
.EmitAtomicLoad(
6068 LVal
, Loc
, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO
),
6072 void CodeGenFunction::emitOMPSimpleStore(LValue LVal
, RValue RVal
,
6073 QualType RValTy
, SourceLocation Loc
) {
6074 switch (getEvaluationKind(LVal
.getType())) {
6076 EmitStoreThroughLValue(RValue::get(convertToScalarValue(
6077 *this, RVal
, RValTy
, LVal
.getType(), Loc
)),
6082 convertToComplexValue(*this, RVal
, RValTy
, LVal
.getType(), Loc
), LVal
,
6086 llvm_unreachable("Must be a scalar or complex.");
6090 static void emitOMPAtomicReadExpr(CodeGenFunction
&CGF
, llvm::AtomicOrdering AO
,
6091 const Expr
*X
, const Expr
*V
,
6092 SourceLocation Loc
) {
6094 assert(V
->isLValue() && "V of 'omp atomic read' is not lvalue");
6095 assert(X
->isLValue() && "X of 'omp atomic read' is not lvalue");
6096 LValue XLValue
= CGF
.EmitLValue(X
);
6097 LValue VLValue
= CGF
.EmitLValue(V
);
6098 RValue Res
= emitSimpleAtomicLoad(CGF
, AO
, XLValue
, Loc
);
6099 // OpenMP, 2.17.7, atomic Construct
6100 // If the read or capture clause is specified and the acquire, acq_rel, or
6101 // seq_cst clause is specified then the strong flush on exit from the atomic
6102 // operation is also an acquire flush.
6104 case llvm::AtomicOrdering::Acquire
:
6105 case llvm::AtomicOrdering::AcquireRelease
:
6106 case llvm::AtomicOrdering::SequentiallyConsistent
:
6107 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6108 llvm::AtomicOrdering::Acquire
);
6110 case llvm::AtomicOrdering::Monotonic
:
6111 case llvm::AtomicOrdering::Release
:
6113 case llvm::AtomicOrdering::NotAtomic
:
6114 case llvm::AtomicOrdering::Unordered
:
6115 llvm_unreachable("Unexpected ordering.");
6117 CGF
.emitOMPSimpleStore(VLValue
, Res
, X
->getType().getNonReferenceType(), Loc
);
6118 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, V
);
6121 static void emitOMPAtomicWriteExpr(CodeGenFunction
&CGF
,
6122 llvm::AtomicOrdering AO
, const Expr
*X
,
6123 const Expr
*E
, SourceLocation Loc
) {
6125 assert(X
->isLValue() && "X of 'omp atomic write' is not lvalue");
6126 emitSimpleAtomicStore(CGF
, AO
, CGF
.EmitLValue(X
), CGF
.EmitAnyExpr(E
));
6127 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6128 // OpenMP, 2.17.7, atomic Construct
6129 // If the write, update, or capture clause is specified and the release,
6130 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6131 // the atomic operation is also a release flush.
6133 case llvm::AtomicOrdering::Release
:
6134 case llvm::AtomicOrdering::AcquireRelease
:
6135 case llvm::AtomicOrdering::SequentiallyConsistent
:
6136 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6137 llvm::AtomicOrdering::Release
);
6139 case llvm::AtomicOrdering::Acquire
:
6140 case llvm::AtomicOrdering::Monotonic
:
6142 case llvm::AtomicOrdering::NotAtomic
:
6143 case llvm::AtomicOrdering::Unordered
:
6144 llvm_unreachable("Unexpected ordering.");
6148 static std::pair
<bool, RValue
> emitOMPAtomicRMW(CodeGenFunction
&CGF
, LValue X
,
6150 BinaryOperatorKind BO
,
6151 llvm::AtomicOrdering AO
,
6152 bool IsXLHSInRHSPart
) {
6153 ASTContext
&Context
= CGF
.getContext();
6154 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6155 // expression is simple and atomic is allowed for the given type for the
6157 if (BO
== BO_Comma
|| !Update
.isScalar() || !X
.isSimple() ||
6158 (!isa
<llvm::ConstantInt
>(Update
.getScalarVal()) &&
6159 (Update
.getScalarVal()->getType() != X
.getAddress().getElementType())) ||
6160 !Context
.getTargetInfo().hasBuiltinAtomic(
6161 Context
.getTypeSize(X
.getType()), Context
.toBits(X
.getAlignment())))
6162 return std::make_pair(false, RValue::get(nullptr));
6164 auto &&CheckAtomicSupport
= [&CGF
](llvm::Type
*T
, BinaryOperatorKind BO
) {
6165 if (T
->isIntegerTy())
6168 if (T
->isFloatingPointTy() && (BO
== BO_Add
|| BO
== BO_Sub
))
6169 return llvm::isPowerOf2_64(CGF
.CGM
.getDataLayout().getTypeStoreSize(T
));
6174 if (!CheckAtomicSupport(Update
.getScalarVal()->getType(), BO
) ||
6175 !CheckAtomicSupport(X
.getAddress().getElementType(), BO
))
6176 return std::make_pair(false, RValue::get(nullptr));
6178 bool IsInteger
= X
.getAddress().getElementType()->isIntegerTy();
6179 llvm::AtomicRMWInst::BinOp RMWOp
;
6182 RMWOp
= IsInteger
? llvm::AtomicRMWInst::Add
: llvm::AtomicRMWInst::FAdd
;
6185 if (!IsXLHSInRHSPart
)
6186 return std::make_pair(false, RValue::get(nullptr));
6187 RMWOp
= IsInteger
? llvm::AtomicRMWInst::Sub
: llvm::AtomicRMWInst::FSub
;
6190 RMWOp
= llvm::AtomicRMWInst::And
;
6193 RMWOp
= llvm::AtomicRMWInst::Or
;
6196 RMWOp
= llvm::AtomicRMWInst::Xor
;
6200 RMWOp
= X
.getType()->hasSignedIntegerRepresentation()
6201 ? (IsXLHSInRHSPart
? llvm::AtomicRMWInst::Min
6202 : llvm::AtomicRMWInst::Max
)
6203 : (IsXLHSInRHSPart
? llvm::AtomicRMWInst::UMin
6204 : llvm::AtomicRMWInst::UMax
);
6206 RMWOp
= IsXLHSInRHSPart
? llvm::AtomicRMWInst::FMin
6207 : llvm::AtomicRMWInst::FMax
;
6211 RMWOp
= X
.getType()->hasSignedIntegerRepresentation()
6212 ? (IsXLHSInRHSPart
? llvm::AtomicRMWInst::Max
6213 : llvm::AtomicRMWInst::Min
)
6214 : (IsXLHSInRHSPart
? llvm::AtomicRMWInst::UMax
6215 : llvm::AtomicRMWInst::UMin
);
6217 RMWOp
= IsXLHSInRHSPart
? llvm::AtomicRMWInst::FMax
6218 : llvm::AtomicRMWInst::FMin
;
6221 RMWOp
= llvm::AtomicRMWInst::Xchg
;
6230 return std::make_pair(false, RValue::get(nullptr));
6249 llvm_unreachable("Unsupported atomic update operation");
6251 llvm::Value
*UpdateVal
= Update
.getScalarVal();
6252 if (auto *IC
= dyn_cast
<llvm::ConstantInt
>(UpdateVal
)) {
6254 UpdateVal
= CGF
.Builder
.CreateIntCast(
6255 IC
, X
.getAddress().getElementType(),
6256 X
.getType()->hasSignedIntegerRepresentation());
6258 UpdateVal
= CGF
.Builder
.CreateCast(llvm::Instruction::CastOps::UIToFP
, IC
,
6259 X
.getAddress().getElementType());
6262 CGF
.Builder
.CreateAtomicRMW(RMWOp
, X
.getAddress(), UpdateVal
, AO
);
6263 return std::make_pair(true, RValue::get(Res
));
6266 std::pair
<bool, RValue
> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6267 LValue X
, RValue E
, BinaryOperatorKind BO
, bool IsXLHSInRHSPart
,
6268 llvm::AtomicOrdering AO
, SourceLocation Loc
,
6269 const llvm::function_ref
<RValue(RValue
)> CommonGen
) {
6270 // Update expressions are allowed to have the following forms:
6271 // x binop= expr; -> xrval + expr;
6272 // x++, ++x -> xrval + 1;
6273 // x--, --x -> xrval - 1;
6274 // x = x binop expr; -> xrval binop expr
6275 // x = expr Op x; - > expr binop xrval;
6276 auto Res
= emitOMPAtomicRMW(*this, X
, E
, BO
, AO
, IsXLHSInRHSPart
);
6278 if (X
.isGlobalReg()) {
6279 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6281 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X
, Loc
)), X
);
6283 // Perform compare-and-swap procedure.
6284 EmitAtomicUpdate(X
, AO
, CommonGen
, X
.getType().isVolatileQualified());
6290 static void emitOMPAtomicUpdateExpr(CodeGenFunction
&CGF
,
6291 llvm::AtomicOrdering AO
, const Expr
*X
,
6292 const Expr
*E
, const Expr
*UE
,
6293 bool IsXLHSInRHSPart
, SourceLocation Loc
) {
6294 assert(isa
<BinaryOperator
>(UE
->IgnoreImpCasts()) &&
6295 "Update expr in 'atomic update' must be a binary operator.");
6296 const auto *BOUE
= cast
<BinaryOperator
>(UE
->IgnoreImpCasts());
6297 // Update expressions are allowed to have the following forms:
6298 // x binop= expr; -> xrval + expr;
6299 // x++, ++x -> xrval + 1;
6300 // x--, --x -> xrval - 1;
6301 // x = x binop expr; -> xrval binop expr
6302 // x = expr Op x; - > expr binop xrval;
6303 assert(X
->isLValue() && "X of 'omp atomic update' is not lvalue");
6304 LValue XLValue
= CGF
.EmitLValue(X
);
6305 RValue ExprRValue
= CGF
.EmitAnyExpr(E
);
6306 const auto *LHS
= cast
<OpaqueValueExpr
>(BOUE
->getLHS()->IgnoreImpCasts());
6307 const auto *RHS
= cast
<OpaqueValueExpr
>(BOUE
->getRHS()->IgnoreImpCasts());
6308 const OpaqueValueExpr
*XRValExpr
= IsXLHSInRHSPart
? LHS
: RHS
;
6309 const OpaqueValueExpr
*ERValExpr
= IsXLHSInRHSPart
? RHS
: LHS
;
6310 auto &&Gen
= [&CGF
, UE
, ExprRValue
, XRValExpr
, ERValExpr
](RValue XRValue
) {
6311 CodeGenFunction::OpaqueValueMapping
MapExpr(CGF
, ERValExpr
, ExprRValue
);
6312 CodeGenFunction::OpaqueValueMapping
MapX(CGF
, XRValExpr
, XRValue
);
6313 return CGF
.EmitAnyExpr(UE
);
6315 (void)CGF
.EmitOMPAtomicSimpleUpdateExpr(
6316 XLValue
, ExprRValue
, BOUE
->getOpcode(), IsXLHSInRHSPart
, AO
, Loc
, Gen
);
6317 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6318 // OpenMP, 2.17.7, atomic Construct
6319 // If the write, update, or capture clause is specified and the release,
6320 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6321 // the atomic operation is also a release flush.
6323 case llvm::AtomicOrdering::Release
:
6324 case llvm::AtomicOrdering::AcquireRelease
:
6325 case llvm::AtomicOrdering::SequentiallyConsistent
:
6326 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6327 llvm::AtomicOrdering::Release
);
6329 case llvm::AtomicOrdering::Acquire
:
6330 case llvm::AtomicOrdering::Monotonic
:
6332 case llvm::AtomicOrdering::NotAtomic
:
6333 case llvm::AtomicOrdering::Unordered
:
6334 llvm_unreachable("Unexpected ordering.");
6338 static RValue
convertToType(CodeGenFunction
&CGF
, RValue Value
,
6339 QualType SourceType
, QualType ResType
,
6340 SourceLocation Loc
) {
6341 switch (CGF
.getEvaluationKind(ResType
)) {
6344 convertToScalarValue(CGF
, Value
, SourceType
, ResType
, Loc
));
6346 auto Res
= convertToComplexValue(CGF
, Value
, SourceType
, ResType
, Loc
);
6347 return RValue::getComplex(Res
.first
, Res
.second
);
6352 llvm_unreachable("Must be a scalar or complex.");
6355 static void emitOMPAtomicCaptureExpr(CodeGenFunction
&CGF
,
6356 llvm::AtomicOrdering AO
,
6357 bool IsPostfixUpdate
, const Expr
*V
,
6358 const Expr
*X
, const Expr
*E
,
6359 const Expr
*UE
, bool IsXLHSInRHSPart
,
6360 SourceLocation Loc
) {
6361 assert(X
->isLValue() && "X of 'omp atomic capture' is not lvalue");
6362 assert(V
->isLValue() && "V of 'omp atomic capture' is not lvalue");
6364 LValue VLValue
= CGF
.EmitLValue(V
);
6365 LValue XLValue
= CGF
.EmitLValue(X
);
6366 RValue ExprRValue
= CGF
.EmitAnyExpr(E
);
6367 QualType NewVValType
;
6369 // 'x' is updated with some additional value.
6370 assert(isa
<BinaryOperator
>(UE
->IgnoreImpCasts()) &&
6371 "Update expr in 'atomic capture' must be a binary operator.");
6372 const auto *BOUE
= cast
<BinaryOperator
>(UE
->IgnoreImpCasts());
6373 // Update expressions are allowed to have the following forms:
6374 // x binop= expr; -> xrval + expr;
6375 // x++, ++x -> xrval + 1;
6376 // x--, --x -> xrval - 1;
6377 // x = x binop expr; -> xrval binop expr
6378 // x = expr Op x; - > expr binop xrval;
6379 const auto *LHS
= cast
<OpaqueValueExpr
>(BOUE
->getLHS()->IgnoreImpCasts());
6380 const auto *RHS
= cast
<OpaqueValueExpr
>(BOUE
->getRHS()->IgnoreImpCasts());
6381 const OpaqueValueExpr
*XRValExpr
= IsXLHSInRHSPart
? LHS
: RHS
;
6382 NewVValType
= XRValExpr
->getType();
6383 const OpaqueValueExpr
*ERValExpr
= IsXLHSInRHSPart
? RHS
: LHS
;
6384 auto &&Gen
= [&CGF
, &NewVVal
, UE
, ExprRValue
, XRValExpr
, ERValExpr
,
6385 IsPostfixUpdate
](RValue XRValue
) {
6386 CodeGenFunction::OpaqueValueMapping
MapExpr(CGF
, ERValExpr
, ExprRValue
);
6387 CodeGenFunction::OpaqueValueMapping
MapX(CGF
, XRValExpr
, XRValue
);
6388 RValue Res
= CGF
.EmitAnyExpr(UE
);
6389 NewVVal
= IsPostfixUpdate
? XRValue
: Res
;
6392 auto Res
= CGF
.EmitOMPAtomicSimpleUpdateExpr(
6393 XLValue
, ExprRValue
, BOUE
->getOpcode(), IsXLHSInRHSPart
, AO
, Loc
, Gen
);
6394 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6396 // 'atomicrmw' instruction was generated.
6397 if (IsPostfixUpdate
) {
6398 // Use old value from 'atomicrmw'.
6399 NewVVal
= Res
.second
;
6401 // 'atomicrmw' does not provide new value, so evaluate it using old
6403 CodeGenFunction::OpaqueValueMapping
MapExpr(CGF
, ERValExpr
, ExprRValue
);
6404 CodeGenFunction::OpaqueValueMapping
MapX(CGF
, XRValExpr
, Res
.second
);
6405 NewVVal
= CGF
.EmitAnyExpr(UE
);
6409 // 'x' is simply rewritten with some 'expr'.
6410 NewVValType
= X
->getType().getNonReferenceType();
6411 ExprRValue
= convertToType(CGF
, ExprRValue
, E
->getType(),
6412 X
->getType().getNonReferenceType(), Loc
);
6413 auto &&Gen
= [&NewVVal
, ExprRValue
](RValue XRValue
) {
6417 // Try to perform atomicrmw xchg, otherwise simple exchange.
6418 auto Res
= CGF
.EmitOMPAtomicSimpleUpdateExpr(
6419 XLValue
, ExprRValue
, /*BO=*/BO_Assign
, /*IsXLHSInRHSPart=*/false, AO
,
6421 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6423 // 'atomicrmw' instruction was generated.
6424 NewVVal
= IsPostfixUpdate
? Res
.second
: ExprRValue
;
6427 // Emit post-update store to 'v' of old/new 'x' value.
6428 CGF
.emitOMPSimpleStore(VLValue
, NewVVal
, NewVValType
, Loc
);
6429 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, V
);
6430 // OpenMP 5.1 removes the required flush for capture clause.
6431 if (CGF
.CGM
.getLangOpts().OpenMP
< 51) {
6432 // OpenMP, 2.17.7, atomic Construct
6433 // If the write, update, or capture clause is specified and the release,
6434 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6435 // the atomic operation is also a release flush.
6436 // If the read or capture clause is specified and the acquire, acq_rel, or
6437 // seq_cst clause is specified then the strong flush on exit from the atomic
6438 // operation is also an acquire flush.
6440 case llvm::AtomicOrdering::Release
:
6441 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6442 llvm::AtomicOrdering::Release
);
6444 case llvm::AtomicOrdering::Acquire
:
6445 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6446 llvm::AtomicOrdering::Acquire
);
6448 case llvm::AtomicOrdering::AcquireRelease
:
6449 case llvm::AtomicOrdering::SequentiallyConsistent
:
6450 CGF
.CGM
.getOpenMPRuntime().emitFlush(
6451 CGF
, std::nullopt
, Loc
, llvm::AtomicOrdering::AcquireRelease
);
6453 case llvm::AtomicOrdering::Monotonic
:
6455 case llvm::AtomicOrdering::NotAtomic
:
6456 case llvm::AtomicOrdering::Unordered
:
6457 llvm_unreachable("Unexpected ordering.");
6462 static void emitOMPAtomicCompareExpr(
6463 CodeGenFunction
&CGF
, llvm::AtomicOrdering AO
, llvm::AtomicOrdering FailAO
,
6464 const Expr
*X
, const Expr
*V
, const Expr
*R
, const Expr
*E
, const Expr
*D
,
6465 const Expr
*CE
, bool IsXBinopExpr
, bool IsPostfixUpdate
, bool IsFailOnly
,
6466 SourceLocation Loc
) {
6467 llvm::OpenMPIRBuilder
&OMPBuilder
=
6468 CGF
.CGM
.getOpenMPRuntime().getOMPBuilder();
6470 OMPAtomicCompareOp Op
;
6471 assert(isa
<BinaryOperator
>(CE
) && "CE is not a BinaryOperator");
6472 switch (cast
<BinaryOperator
>(CE
)->getOpcode()) {
6474 Op
= OMPAtomicCompareOp::EQ
;
6477 Op
= OMPAtomicCompareOp::MIN
;
6480 Op
= OMPAtomicCompareOp::MAX
;
6483 llvm_unreachable("unsupported atomic compare binary operator");
6486 LValue XLVal
= CGF
.EmitLValue(X
);
6487 Address XAddr
= XLVal
.getAddress();
6489 auto EmitRValueWithCastIfNeeded
= [&CGF
, Loc
](const Expr
*X
, const Expr
*E
) {
6490 if (X
->getType() == E
->getType())
6491 return CGF
.EmitScalarExpr(E
);
6492 const Expr
*NewE
= E
->IgnoreImplicitAsWritten();
6493 llvm::Value
*V
= CGF
.EmitScalarExpr(NewE
);
6494 if (NewE
->getType() == X
->getType())
6496 return CGF
.EmitScalarConversion(V
, NewE
->getType(), X
->getType(), Loc
);
6499 llvm::Value
*EVal
= EmitRValueWithCastIfNeeded(X
, E
);
6500 llvm::Value
*DVal
= D
? EmitRValueWithCastIfNeeded(X
, D
) : nullptr;
6501 if (auto *CI
= dyn_cast
<llvm::ConstantInt
>(EVal
))
6502 EVal
= CGF
.Builder
.CreateIntCast(
6503 CI
, XLVal
.getAddress().getElementType(),
6504 E
->getType()->hasSignedIntegerRepresentation());
6506 if (auto *CI
= dyn_cast
<llvm::ConstantInt
>(DVal
))
6507 DVal
= CGF
.Builder
.CreateIntCast(
6508 CI
, XLVal
.getAddress().getElementType(),
6509 D
->getType()->hasSignedIntegerRepresentation());
6511 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal
{
6512 XAddr
.emitRawPointer(CGF
), XAddr
.getElementType(),
6513 X
->getType()->hasSignedIntegerRepresentation(),
6514 X
->getType().isVolatileQualified()};
6515 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal
, ROpVal
;
6517 LValue LV
= CGF
.EmitLValue(V
);
6518 Address Addr
= LV
.getAddress();
6519 VOpVal
= {Addr
.emitRawPointer(CGF
), Addr
.getElementType(),
6520 V
->getType()->hasSignedIntegerRepresentation(),
6521 V
->getType().isVolatileQualified()};
6524 LValue LV
= CGF
.EmitLValue(R
);
6525 Address Addr
= LV
.getAddress();
6526 ROpVal
= {Addr
.emitRawPointer(CGF
), Addr
.getElementType(),
6527 R
->getType()->hasSignedIntegerRepresentation(),
6528 R
->getType().isVolatileQualified()};
6531 if (FailAO
== llvm::AtomicOrdering::NotAtomic
) {
6532 // fail clause was not mentioned on the
6533 // "#pragma omp atomic compare" construct.
6534 CGF
.Builder
.restoreIP(OMPBuilder
.createAtomicCompare(
6535 CGF
.Builder
, XOpVal
, VOpVal
, ROpVal
, EVal
, DVal
, AO
, Op
, IsXBinopExpr
,
6536 IsPostfixUpdate
, IsFailOnly
));
6538 CGF
.Builder
.restoreIP(OMPBuilder
.createAtomicCompare(
6539 CGF
.Builder
, XOpVal
, VOpVal
, ROpVal
, EVal
, DVal
, AO
, Op
, IsXBinopExpr
,
6540 IsPostfixUpdate
, IsFailOnly
, FailAO
));
6543 static void emitOMPAtomicExpr(CodeGenFunction
&CGF
, OpenMPClauseKind Kind
,
6544 llvm::AtomicOrdering AO
,
6545 llvm::AtomicOrdering FailAO
, bool IsPostfixUpdate
,
6546 const Expr
*X
, const Expr
*V
, const Expr
*R
,
6547 const Expr
*E
, const Expr
*UE
, const Expr
*D
,
6548 const Expr
*CE
, bool IsXLHSInRHSPart
,
6549 bool IsFailOnly
, SourceLocation Loc
) {
6552 emitOMPAtomicReadExpr(CGF
, AO
, X
, V
, Loc
);
6555 emitOMPAtomicWriteExpr(CGF
, AO
, X
, E
, Loc
);
6559 emitOMPAtomicUpdateExpr(CGF
, AO
, X
, E
, UE
, IsXLHSInRHSPart
, Loc
);
6562 emitOMPAtomicCaptureExpr(CGF
, AO
, IsPostfixUpdate
, V
, X
, E
, UE
,
6563 IsXLHSInRHSPart
, Loc
);
6565 case OMPC_compare
: {
6566 emitOMPAtomicCompareExpr(CGF
, AO
, FailAO
, X
, V
, R
, E
, D
, CE
,
6567 IsXLHSInRHSPart
, IsPostfixUpdate
, IsFailOnly
, Loc
);
6571 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6575 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective
&S
) {
6576 llvm::AtomicOrdering AO
= CGM
.getOpenMPRuntime().getDefaultMemoryOrdering();
6577 // Fail Memory Clause Ordering.
6578 llvm::AtomicOrdering FailAO
= llvm::AtomicOrdering::NotAtomic
;
6579 bool MemOrderingSpecified
= false;
6580 if (S
.getSingleClause
<OMPSeqCstClause
>()) {
6581 AO
= llvm::AtomicOrdering::SequentiallyConsistent
;
6582 MemOrderingSpecified
= true;
6583 } else if (S
.getSingleClause
<OMPAcqRelClause
>()) {
6584 AO
= llvm::AtomicOrdering::AcquireRelease
;
6585 MemOrderingSpecified
= true;
6586 } else if (S
.getSingleClause
<OMPAcquireClause
>()) {
6587 AO
= llvm::AtomicOrdering::Acquire
;
6588 MemOrderingSpecified
= true;
6589 } else if (S
.getSingleClause
<OMPReleaseClause
>()) {
6590 AO
= llvm::AtomicOrdering::Release
;
6591 MemOrderingSpecified
= true;
6592 } else if (S
.getSingleClause
<OMPRelaxedClause
>()) {
6593 AO
= llvm::AtomicOrdering::Monotonic
;
6594 MemOrderingSpecified
= true;
6596 llvm::SmallSet
<OpenMPClauseKind
, 2> KindsEncountered
;
6597 OpenMPClauseKind Kind
= OMPC_unknown
;
6598 for (const OMPClause
*C
: S
.clauses()) {
6599 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6601 OpenMPClauseKind K
= C
->getClauseKind();
6605 if (K
== OMPC_seq_cst
|| K
== OMPC_acq_rel
|| K
== OMPC_acquire
||
6606 K
== OMPC_release
|| K
== OMPC_relaxed
|| K
== OMPC_hint
)
6609 KindsEncountered
.insert(K
);
6611 // We just need to correct Kind here. No need to set a bool saying it is
6612 // actually compare capture because we can tell from whether V and R are
6614 if (KindsEncountered
.contains(OMPC_compare
) &&
6615 KindsEncountered
.contains(OMPC_capture
))
6616 Kind
= OMPC_compare
;
6617 if (!MemOrderingSpecified
) {
6618 llvm::AtomicOrdering DefaultOrder
=
6619 CGM
.getOpenMPRuntime().getDefaultMemoryOrdering();
6620 if (DefaultOrder
== llvm::AtomicOrdering::Monotonic
||
6621 DefaultOrder
== llvm::AtomicOrdering::SequentiallyConsistent
||
6622 (DefaultOrder
== llvm::AtomicOrdering::AcquireRelease
&&
6623 Kind
== OMPC_capture
)) {
6625 } else if (DefaultOrder
== llvm::AtomicOrdering::AcquireRelease
) {
6626 if (Kind
== OMPC_unknown
|| Kind
== OMPC_update
|| Kind
== OMPC_write
) {
6627 AO
= llvm::AtomicOrdering::Release
;
6628 } else if (Kind
== OMPC_read
) {
6629 assert(Kind
== OMPC_read
&& "Unexpected atomic kind.");
6630 AO
= llvm::AtomicOrdering::Acquire
;
6635 if (KindsEncountered
.contains(OMPC_compare
) &&
6636 KindsEncountered
.contains(OMPC_fail
)) {
6637 Kind
= OMPC_compare
;
6638 const auto *FailClause
= S
.getSingleClause
<OMPFailClause
>();
6640 OpenMPClauseKind FailParameter
= FailClause
->getFailParameter();
6641 if (FailParameter
== llvm::omp::OMPC_relaxed
)
6642 FailAO
= llvm::AtomicOrdering::Monotonic
;
6643 else if (FailParameter
== llvm::omp::OMPC_acquire
)
6644 FailAO
= llvm::AtomicOrdering::Acquire
;
6645 else if (FailParameter
== llvm::omp::OMPC_seq_cst
)
6646 FailAO
= llvm::AtomicOrdering::SequentiallyConsistent
;
6650 LexicalScope
Scope(*this, S
.getSourceRange());
6651 EmitStopPoint(S
.getAssociatedStmt());
6652 emitOMPAtomicExpr(*this, Kind
, AO
, FailAO
, S
.isPostfixUpdate(), S
.getX(),
6653 S
.getV(), S
.getR(), S
.getExpr(), S
.getUpdateExpr(),
6654 S
.getD(), S
.getCondExpr(), S
.isXLHSInRHSPart(),
6655 S
.isFailOnly(), S
.getBeginLoc());
6658 static void emitCommonOMPTargetDirective(CodeGenFunction
&CGF
,
6659 const OMPExecutableDirective
&S
,
6660 const RegionCodeGenTy
&CodeGen
) {
6661 assert(isOpenMPTargetExecutionDirective(S
.getDirectiveKind()));
6662 CodeGenModule
&CGM
= CGF
.CGM
;
6664 // On device emit this construct as inlined code.
6665 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
6666 OMPLexicalScope
Scope(CGF
, S
, OMPD_target
);
6667 CGM
.getOpenMPRuntime().emitInlinedDirective(
6668 CGF
, OMPD_target
, [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6669 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
6674 auto LPCRegion
= CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF
, S
);
6675 llvm::Function
*Fn
= nullptr;
6676 llvm::Constant
*FnID
= nullptr;
6678 const Expr
*IfCond
= nullptr;
6679 // Check for the at most one if clause associated with the target region.
6680 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
6681 if (C
->getNameModifier() == OMPD_unknown
||
6682 C
->getNameModifier() == OMPD_target
) {
6683 IfCond
= C
->getCondition();
6688 // Check if we have any device clause associated with the directive.
6689 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device(
6690 nullptr, OMPC_DEVICE_unknown
);
6691 if (auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
6692 Device
.setPointerAndInt(C
->getDevice(), C
->getModifier());
6694 // Check if we have an if clause whose conditional always evaluates to false
6695 // or if we do not have any targets specified. If so the target region is not
6696 // an offload entry point.
6697 bool IsOffloadEntry
= true;
6700 if (CGF
.ConstantFoldsToSimpleInteger(IfCond
, Val
) && !Val
)
6701 IsOffloadEntry
= false;
6703 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
6704 IsOffloadEntry
= false;
6706 if (CGM
.getLangOpts().OpenMPOffloadMandatory
&& !IsOffloadEntry
) {
6707 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
6708 DiagnosticsEngine::Error
,
6709 "No offloading entry generated while offloading is mandatory.");
6710 CGM
.getDiags().Report(DiagID
);
6713 assert(CGF
.CurFuncDecl
&& "No parent declaration for target region!");
6714 StringRef ParentName
;
6715 // In case we have Ctors/Dtors we use the complete type variant to produce
6716 // the mangling of the device outlined kernel.
6717 if (const auto *D
= dyn_cast
<CXXConstructorDecl
>(CGF
.CurFuncDecl
))
6718 ParentName
= CGM
.getMangledName(GlobalDecl(D
, Ctor_Complete
));
6719 else if (const auto *D
= dyn_cast
<CXXDestructorDecl
>(CGF
.CurFuncDecl
))
6720 ParentName
= CGM
.getMangledName(GlobalDecl(D
, Dtor_Complete
));
6723 CGM
.getMangledName(GlobalDecl(cast
<FunctionDecl
>(CGF
.CurFuncDecl
)));
6725 // Emit target region as a standalone region.
6726 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(S
, ParentName
, Fn
, FnID
,
6727 IsOffloadEntry
, CodeGen
);
6728 OMPLexicalScope
Scope(CGF
, S
, OMPD_task
);
6729 auto &&SizeEmitter
=
6730 [IsOffloadEntry
](CodeGenFunction
&CGF
,
6731 const OMPLoopDirective
&D
) -> llvm::Value
* {
6732 if (IsOffloadEntry
) {
6733 OMPLoopScope(CGF
, D
);
6734 // Emit calculation of the iterations count.
6735 llvm::Value
*NumIterations
= CGF
.EmitScalarExpr(D
.getNumIterations());
6736 NumIterations
= CGF
.Builder
.CreateIntCast(NumIterations
, CGF
.Int64Ty
,
6737 /*isSigned=*/false);
6738 return NumIterations
;
6742 CGM
.getOpenMPRuntime().emitTargetCall(CGF
, S
, Fn
, FnID
, IfCond
, Device
,
6746 static void emitTargetRegion(CodeGenFunction
&CGF
, const OMPTargetDirective
&S
,
6747 PrePostActionTy
&Action
) {
6749 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6750 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
6751 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
6752 (void)PrivateScope
.Privatize();
6753 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
6754 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
6756 CGF
.EmitStmt(S
.getCapturedStmt(OMPD_target
)->getCapturedStmt());
6757 CGF
.EnsureInsertPoint();
6760 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule
&CGM
,
6761 StringRef ParentName
,
6762 const OMPTargetDirective
&S
) {
6763 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6764 emitTargetRegion(CGF
, S
, Action
);
6767 llvm::Constant
*Addr
;
6768 // Emit target region as a standalone region.
6769 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6770 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6771 assert(Fn
&& Addr
&& "Target device function emission failed.");
6774 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective
&S
) {
6775 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6776 emitTargetRegion(CGF
, S
, Action
);
6778 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6781 static void emitCommonOMPTeamsDirective(CodeGenFunction
&CGF
,
6782 const OMPExecutableDirective
&S
,
6783 OpenMPDirectiveKind InnermostKind
,
6784 const RegionCodeGenTy
&CodeGen
) {
6785 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_teams
);
6786 llvm::Function
*OutlinedFn
=
6787 CGF
.CGM
.getOpenMPRuntime().emitTeamsOutlinedFunction(
6788 CGF
, S
, *CS
->getCapturedDecl()->param_begin(), InnermostKind
,
6791 const auto *NT
= S
.getSingleClause
<OMPNumTeamsClause
>();
6792 const auto *TL
= S
.getSingleClause
<OMPThreadLimitClause
>();
6794 const Expr
*NumTeams
= NT
? NT
->getNumTeams() : nullptr;
6795 const Expr
*ThreadLimit
= TL
? TL
->getThreadLimit() : nullptr;
6797 CGF
.CGM
.getOpenMPRuntime().emitNumTeamsClause(CGF
, NumTeams
, ThreadLimit
,
6801 OMPTeamsScope
Scope(CGF
, S
);
6802 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
6803 CGF
.GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
6804 CGF
.CGM
.getOpenMPRuntime().emitTeamsCall(CGF
, S
, S
.getBeginLoc(), OutlinedFn
,
6808 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective
&S
) {
6809 // Emit teams region as a standalone region.
6810 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6812 OMPPrivateScope
PrivateScope(CGF
);
6813 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
6814 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
6815 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6816 (void)PrivateScope
.Privatize();
6817 CGF
.EmitStmt(S
.getCapturedStmt(OMPD_teams
)->getCapturedStmt());
6818 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6820 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute
, CodeGen
);
6821 emitPostUpdateForReductionClause(*this, S
,
6822 [](CodeGenFunction
&) { return nullptr; });
6825 static void emitTargetTeamsRegion(CodeGenFunction
&CGF
, PrePostActionTy
&Action
,
6826 const OMPTargetTeamsDirective
&S
) {
6827 auto *CS
= S
.getCapturedStmt(OMPD_teams
);
6829 // Emit teams region as a standalone region.
6830 auto &&CodeGen
= [&S
, CS
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6832 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6833 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
6834 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
6835 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6836 (void)PrivateScope
.Privatize();
6837 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
6838 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
6839 CGF
.EmitStmt(CS
->getCapturedStmt());
6840 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6842 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_teams
, CodeGen
);
6843 emitPostUpdateForReductionClause(CGF
, S
,
6844 [](CodeGenFunction
&) { return nullptr; });
6847 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6848 CodeGenModule
&CGM
, StringRef ParentName
,
6849 const OMPTargetTeamsDirective
&S
) {
6850 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6851 emitTargetTeamsRegion(CGF
, Action
, S
);
6854 llvm::Constant
*Addr
;
6855 // Emit target region as a standalone region.
6856 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6857 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6858 assert(Fn
&& Addr
&& "Target device function emission failed.");
6861 void CodeGenFunction::EmitOMPTargetTeamsDirective(
6862 const OMPTargetTeamsDirective
&S
) {
6863 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6864 emitTargetTeamsRegion(CGF
, Action
, S
);
6866 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6870 emitTargetTeamsDistributeRegion(CodeGenFunction
&CGF
, PrePostActionTy
&Action
,
6871 const OMPTargetTeamsDistributeDirective
&S
) {
6873 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6874 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6877 // Emit teams region as a standalone region.
6878 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6879 PrePostActionTy
&Action
) {
6881 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6882 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6883 (void)PrivateScope
.Privatize();
6884 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6886 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6888 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute
, CodeGen
);
6889 emitPostUpdateForReductionClause(CGF
, S
,
6890 [](CodeGenFunction
&) { return nullptr; });
6893 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6894 CodeGenModule
&CGM
, StringRef ParentName
,
6895 const OMPTargetTeamsDistributeDirective
&S
) {
6896 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6897 emitTargetTeamsDistributeRegion(CGF
, Action
, S
);
6900 llvm::Constant
*Addr
;
6901 // Emit target region as a standalone region.
6902 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6903 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6904 assert(Fn
&& Addr
&& "Target device function emission failed.");
6907 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6908 const OMPTargetTeamsDistributeDirective
&S
) {
6909 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6910 emitTargetTeamsDistributeRegion(CGF
, Action
, S
);
6912 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6915 static void emitTargetTeamsDistributeSimdRegion(
6916 CodeGenFunction
&CGF
, PrePostActionTy
&Action
,
6917 const OMPTargetTeamsDistributeSimdDirective
&S
) {
6919 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6920 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6923 // Emit teams region as a standalone region.
6924 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6925 PrePostActionTy
&Action
) {
6927 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6928 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6929 (void)PrivateScope
.Privatize();
6930 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6932 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6934 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute_simd
, CodeGen
);
6935 emitPostUpdateForReductionClause(CGF
, S
,
6936 [](CodeGenFunction
&) { return nullptr; });
6939 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6940 CodeGenModule
&CGM
, StringRef ParentName
,
6941 const OMPTargetTeamsDistributeSimdDirective
&S
) {
6942 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6943 emitTargetTeamsDistributeSimdRegion(CGF
, Action
, S
);
6946 llvm::Constant
*Addr
;
6947 // Emit target region as a standalone region.
6948 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6949 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6950 assert(Fn
&& Addr
&& "Target device function emission failed.");
6953 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6954 const OMPTargetTeamsDistributeSimdDirective
&S
) {
6955 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6956 emitTargetTeamsDistributeSimdRegion(CGF
, Action
, S
);
6958 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6961 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6962 const OMPTeamsDistributeDirective
&S
) {
6964 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6965 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6968 // Emit teams region as a standalone region.
6969 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6970 PrePostActionTy
&Action
) {
6972 OMPPrivateScope
PrivateScope(CGF
);
6973 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6974 (void)PrivateScope
.Privatize();
6975 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6977 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6979 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute
, CodeGen
);
6980 emitPostUpdateForReductionClause(*this, S
,
6981 [](CodeGenFunction
&) { return nullptr; });
6984 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6985 const OMPTeamsDistributeSimdDirective
&S
) {
6986 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6987 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6990 // Emit teams region as a standalone region.
6991 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6992 PrePostActionTy
&Action
) {
6994 OMPPrivateScope
PrivateScope(CGF
);
6995 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6996 (void)PrivateScope
.Privatize();
6997 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_simd
,
6999 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
7001 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute_simd
, CodeGen
);
7002 emitPostUpdateForReductionClause(*this, S
,
7003 [](CodeGenFunction
&) { return nullptr; });
7006 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
7007 const OMPTeamsDistributeParallelForDirective
&S
) {
7008 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7009 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
7013 // Emit teams region as a standalone region.
7014 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
7015 PrePostActionTy
&Action
) {
7017 OMPPrivateScope
PrivateScope(CGF
);
7018 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7019 (void)PrivateScope
.Privatize();
7020 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
7022 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
7024 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute_parallel_for
, CodeGen
);
7025 emitPostUpdateForReductionClause(*this, S
,
7026 [](CodeGenFunction
&) { return nullptr; });
7029 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
7030 const OMPTeamsDistributeParallelForSimdDirective
&S
) {
7031 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7032 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
7036 // Emit teams region as a standalone region.
7037 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
7038 PrePostActionTy
&Action
) {
7040 OMPPrivateScope
PrivateScope(CGF
);
7041 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7042 (void)PrivateScope
.Privatize();
7043 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
7044 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
7045 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
7047 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute_parallel_for_simd
,
7049 emitPostUpdateForReductionClause(*this, S
,
7050 [](CodeGenFunction
&) { return nullptr; });
7053 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective
&S
) {
7054 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
7055 llvm::Value
*Device
= nullptr;
7056 llvm::Value
*NumDependences
= nullptr;
7057 llvm::Value
*DependenceList
= nullptr;
7059 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7060 Device
= EmitScalarExpr(C
->getDevice());
7062 // Build list and emit dependences
7064 buildDependences(S
, Data
);
7065 if (!Data
.Dependences
.empty()) {
7066 Address DependenciesArray
= Address::invalid();
7067 std::tie(NumDependences
, DependenciesArray
) =
7068 CGM
.getOpenMPRuntime().emitDependClause(*this, Data
.Dependences
,
7070 DependenceList
= DependenciesArray
.emitRawPointer(*this);
7072 Data
.HasNowaitClause
= S
.hasClausesOfKind
<OMPNowaitClause
>();
7074 assert(!(Data
.HasNowaitClause
&& !(S
.getSingleClause
<OMPInitClause
>() ||
7075 S
.getSingleClause
<OMPDestroyClause
>() ||
7076 S
.getSingleClause
<OMPUseClause
>())) &&
7077 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
7079 auto ItOMPInitClause
= S
.getClausesOfKind
<OMPInitClause
>();
7080 if (!ItOMPInitClause
.empty()) {
7081 // Look at the multiple init clauses
7082 for (const OMPInitClause
*C
: ItOMPInitClause
) {
7083 llvm::Value
*InteropvarPtr
=
7084 EmitLValue(C
->getInteropVar()).getPointer(*this);
7085 llvm::omp::OMPInteropType InteropType
=
7086 llvm::omp::OMPInteropType::Unknown
;
7087 if (C
->getIsTarget()) {
7088 InteropType
= llvm::omp::OMPInteropType::Target
;
7090 assert(C
->getIsTargetSync() &&
7091 "Expected interop-type target/targetsync");
7092 InteropType
= llvm::omp::OMPInteropType::TargetSync
;
7094 OMPBuilder
.createOMPInteropInit(Builder
, InteropvarPtr
, InteropType
,
7095 Device
, NumDependences
, DependenceList
,
7096 Data
.HasNowaitClause
);
7099 auto ItOMPDestroyClause
= S
.getClausesOfKind
<OMPDestroyClause
>();
7100 if (!ItOMPDestroyClause
.empty()) {
7101 // Look at the multiple destroy clauses
7102 for (const OMPDestroyClause
*C
: ItOMPDestroyClause
) {
7103 llvm::Value
*InteropvarPtr
=
7104 EmitLValue(C
->getInteropVar()).getPointer(*this);
7105 OMPBuilder
.createOMPInteropDestroy(Builder
, InteropvarPtr
, Device
,
7106 NumDependences
, DependenceList
,
7107 Data
.HasNowaitClause
);
7110 auto ItOMPUseClause
= S
.getClausesOfKind
<OMPUseClause
>();
7111 if (!ItOMPUseClause
.empty()) {
7112 // Look at the multiple use clauses
7113 for (const OMPUseClause
*C
: ItOMPUseClause
) {
7114 llvm::Value
*InteropvarPtr
=
7115 EmitLValue(C
->getInteropVar()).getPointer(*this);
7116 OMPBuilder
.createOMPInteropUse(Builder
, InteropvarPtr
, Device
,
7117 NumDependences
, DependenceList
,
7118 Data
.HasNowaitClause
);
7123 static void emitTargetTeamsDistributeParallelForRegion(
7124 CodeGenFunction
&CGF
, const OMPTargetTeamsDistributeParallelForDirective
&S
,
7125 PrePostActionTy
&Action
) {
7127 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7128 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
7132 // Emit teams region as a standalone region.
7133 auto &&CodeGenTeams
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
7134 PrePostActionTy
&Action
) {
7136 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
7137 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7138 (void)PrivateScope
.Privatize();
7139 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
7140 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
7141 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
7144 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute_parallel_for
,
7146 emitPostUpdateForReductionClause(CGF
, S
,
7147 [](CodeGenFunction
&) { return nullptr; });
7150 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7151 CodeGenModule
&CGM
, StringRef ParentName
,
7152 const OMPTargetTeamsDistributeParallelForDirective
&S
) {
7153 // Emit SPMD target teams distribute parallel for region as a standalone
7155 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7156 emitTargetTeamsDistributeParallelForRegion(CGF
, S
, Action
);
7159 llvm::Constant
*Addr
;
7160 // Emit target region as a standalone region.
7161 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7162 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7163 assert(Fn
&& Addr
&& "Target device function emission failed.");
7166 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7167 const OMPTargetTeamsDistributeParallelForDirective
&S
) {
7168 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7169 emitTargetTeamsDistributeParallelForRegion(CGF
, S
, Action
);
7171 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7174 static void emitTargetTeamsDistributeParallelForSimdRegion(
7175 CodeGenFunction
&CGF
,
7176 const OMPTargetTeamsDistributeParallelForSimdDirective
&S
,
7177 PrePostActionTy
&Action
) {
7179 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7180 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
7184 // Emit teams region as a standalone region.
7185 auto &&CodeGenTeams
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
7186 PrePostActionTy
&Action
) {
7188 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
7189 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7190 (void)PrivateScope
.Privatize();
7191 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
7192 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
7193 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
7196 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute_parallel_for_simd
,
7198 emitPostUpdateForReductionClause(CGF
, S
,
7199 [](CodeGenFunction
&) { return nullptr; });
7202 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7203 CodeGenModule
&CGM
, StringRef ParentName
,
7204 const OMPTargetTeamsDistributeParallelForSimdDirective
&S
) {
7205 // Emit SPMD target teams distribute parallel for simd region as a standalone
7207 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7208 emitTargetTeamsDistributeParallelForSimdRegion(CGF
, S
, Action
);
7211 llvm::Constant
*Addr
;
7212 // Emit target region as a standalone region.
7213 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7214 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7215 assert(Fn
&& Addr
&& "Target device function emission failed.");
7218 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7219 const OMPTargetTeamsDistributeParallelForSimdDirective
&S
) {
7220 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7221 emitTargetTeamsDistributeParallelForSimdRegion(CGF
, S
, Action
);
7223 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7226 void CodeGenFunction::EmitOMPCancellationPointDirective(
7227 const OMPCancellationPointDirective
&S
) {
7228 CGM
.getOpenMPRuntime().emitCancellationPointCall(*this, S
.getBeginLoc(),
7229 S
.getCancelRegion());
7232 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective
&S
) {
7233 const Expr
*IfCond
= nullptr;
7234 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
7235 if (C
->getNameModifier() == OMPD_unknown
||
7236 C
->getNameModifier() == OMPD_cancel
) {
7237 IfCond
= C
->getCondition();
7241 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
7242 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
7243 // TODO: This check is necessary as we only generate `omp parallel` through
7244 // the OpenMPIRBuilder for now.
7245 if (S
.getCancelRegion() == OMPD_parallel
||
7246 S
.getCancelRegion() == OMPD_sections
||
7247 S
.getCancelRegion() == OMPD_section
) {
7248 llvm::Value
*IfCondition
= nullptr;
7250 IfCondition
= EmitScalarExpr(IfCond
,
7251 /*IgnoreResultAssign=*/true);
7252 return Builder
.restoreIP(
7253 OMPBuilder
.createCancel(Builder
, IfCondition
, S
.getCancelRegion()));
7257 CGM
.getOpenMPRuntime().emitCancelCall(*this, S
.getBeginLoc(), IfCond
,
7258 S
.getCancelRegion());
7261 CodeGenFunction::JumpDest
7262 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind
) {
7263 if (Kind
== OMPD_parallel
|| Kind
== OMPD_task
||
7264 Kind
== OMPD_target_parallel
|| Kind
== OMPD_taskloop
||
7265 Kind
== OMPD_master_taskloop
|| Kind
== OMPD_parallel_master_taskloop
)
7267 assert(Kind
== OMPD_for
|| Kind
== OMPD_section
|| Kind
== OMPD_sections
||
7268 Kind
== OMPD_parallel_sections
|| Kind
== OMPD_parallel_for
||
7269 Kind
== OMPD_distribute_parallel_for
||
7270 Kind
== OMPD_target_parallel_for
||
7271 Kind
== OMPD_teams_distribute_parallel_for
||
7272 Kind
== OMPD_target_teams_distribute_parallel_for
);
7273 return OMPCancelStack
.getExitBlock();
7276 void CodeGenFunction::EmitOMPUseDevicePtrClause(
7277 const OMPUseDevicePtrClause
&C
, OMPPrivateScope
&PrivateScope
,
7278 const llvm::DenseMap
<const ValueDecl
*, llvm::Value
*>
7279 CaptureDeviceAddrMap
) {
7280 llvm::SmallDenseSet
<CanonicalDeclPtr
<const Decl
>, 4> Processed
;
7281 for (const Expr
*OrigVarIt
: C
.varlists()) {
7282 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(OrigVarIt
)->getDecl());
7283 if (!Processed
.insert(OrigVD
).second
)
7286 // In order to identify the right initializer we need to match the
7287 // declaration used by the mapping logic. In some cases we may get
7288 // OMPCapturedExprDecl that refers to the original declaration.
7289 const ValueDecl
*MatchingVD
= OrigVD
;
7290 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(MatchingVD
)) {
7291 // OMPCapturedExprDecl are used to privative fields of the current
7293 const auto *ME
= cast
<MemberExpr
>(OED
->getInit());
7294 assert(isa
<CXXThisExpr
>(ME
->getBase()->IgnoreImpCasts()) &&
7295 "Base should be the current struct!");
7296 MatchingVD
= ME
->getMemberDecl();
7299 // If we don't have information about the current list item, move on to
7301 auto InitAddrIt
= CaptureDeviceAddrMap
.find(MatchingVD
);
7302 if (InitAddrIt
== CaptureDeviceAddrMap
.end())
7305 llvm::Type
*Ty
= ConvertTypeForMem(OrigVD
->getType().getNonReferenceType());
7307 // Return the address of the private variable.
7308 bool IsRegistered
= PrivateScope
.addPrivate(
7310 Address(InitAddrIt
->second
, Ty
,
7311 getContext().getTypeAlignInChars(getContext().VoidPtrTy
)));
7312 assert(IsRegistered
&& "firstprivate var already registered as private");
7313 // Silence the warning about unused variable.
7318 static const VarDecl
*getBaseDecl(const Expr
*Ref
) {
7319 const Expr
*Base
= Ref
->IgnoreParenImpCasts();
7320 while (const auto *OASE
= dyn_cast
<ArraySectionExpr
>(Base
))
7321 Base
= OASE
->getBase()->IgnoreParenImpCasts();
7322 while (const auto *ASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
7323 Base
= ASE
->getBase()->IgnoreParenImpCasts();
7324 return cast
<VarDecl
>(cast
<DeclRefExpr
>(Base
)->getDecl());
7327 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7328 const OMPUseDeviceAddrClause
&C
, OMPPrivateScope
&PrivateScope
,
7329 const llvm::DenseMap
<const ValueDecl
*, llvm::Value
*>
7330 CaptureDeviceAddrMap
) {
7331 llvm::SmallDenseSet
<CanonicalDeclPtr
<const Decl
>, 4> Processed
;
7332 for (const Expr
*Ref
: C
.varlists()) {
7333 const VarDecl
*OrigVD
= getBaseDecl(Ref
);
7334 if (!Processed
.insert(OrigVD
).second
)
7336 // In order to identify the right initializer we need to match the
7337 // declaration used by the mapping logic. In some cases we may get
7338 // OMPCapturedExprDecl that refers to the original declaration.
7339 const ValueDecl
*MatchingVD
= OrigVD
;
7340 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(MatchingVD
)) {
7341 // OMPCapturedExprDecl are used to privative fields of the current
7343 const auto *ME
= cast
<MemberExpr
>(OED
->getInit());
7344 assert(isa
<CXXThisExpr
>(ME
->getBase()) &&
7345 "Base should be the current struct!");
7346 MatchingVD
= ME
->getMemberDecl();
7349 // If we don't have information about the current list item, move on to
7351 auto InitAddrIt
= CaptureDeviceAddrMap
.find(MatchingVD
);
7352 if (InitAddrIt
== CaptureDeviceAddrMap
.end())
7355 llvm::Type
*Ty
= ConvertTypeForMem(OrigVD
->getType().getNonReferenceType());
7358 Address(InitAddrIt
->second
, Ty
,
7359 getContext().getTypeAlignInChars(getContext().VoidPtrTy
));
7360 // For declrefs and variable length array need to load the pointer for
7361 // correct mapping, since the pointer to the data was passed to the runtime.
7362 if (isa
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts()) ||
7363 MatchingVD
->getType()->isArrayType()) {
7364 QualType PtrTy
= getContext().getPointerType(
7365 OrigVD
->getType().getNonReferenceType());
7367 EmitLoadOfPointer(PrivAddr
.withElementType(ConvertTypeForMem(PtrTy
)),
7368 PtrTy
->castAs
<PointerType
>());
7371 (void)PrivateScope
.addPrivate(OrigVD
, PrivAddr
);
7375 // Generate the instructions for '#pragma omp target data' directive.
7376 void CodeGenFunction::EmitOMPTargetDataDirective(
7377 const OMPTargetDataDirective
&S
) {
7378 CGOpenMPRuntime::TargetDataInfo
Info(/*RequiresDevicePointerInfo=*/true,
7379 /*SeparateBeginEndCalls=*/true);
7381 // Create a pre/post action to signal the privatization of the device pointer.
7382 // This action can be replaced by the OpenMP runtime code generation to
7383 // deactivate privatization.
7384 bool PrivatizeDevicePointers
= false;
7385 class DevicePointerPrivActionTy
: public PrePostActionTy
{
7386 bool &PrivatizeDevicePointers
;
7389 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers
)
7390 : PrivatizeDevicePointers(PrivatizeDevicePointers
) {}
7391 void Enter(CodeGenFunction
&CGF
) override
{
7392 PrivatizeDevicePointers
= true;
7395 DevicePointerPrivActionTy
PrivAction(PrivatizeDevicePointers
);
7397 auto &&CodeGen
= [&](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7398 auto &&InnermostCodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7399 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
7402 // Codegen that selects whether to generate the privatization code or not.
7403 auto &&PrivCodeGen
= [&](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7404 RegionCodeGenTy
RCG(InnermostCodeGen
);
7405 PrivatizeDevicePointers
= false;
7407 // Call the pre-action to change the status of PrivatizeDevicePointers if
7411 if (PrivatizeDevicePointers
) {
7412 OMPPrivateScope
PrivateScope(CGF
);
7413 // Emit all instances of the use_device_ptr clause.
7414 for (const auto *C
: S
.getClausesOfKind
<OMPUseDevicePtrClause
>())
7415 CGF
.EmitOMPUseDevicePtrClause(*C
, PrivateScope
,
7416 Info
.CaptureDeviceAddrMap
);
7417 for (const auto *C
: S
.getClausesOfKind
<OMPUseDeviceAddrClause
>())
7418 CGF
.EmitOMPUseDeviceAddrClause(*C
, PrivateScope
,
7419 Info
.CaptureDeviceAddrMap
);
7420 (void)PrivateScope
.Privatize();
7423 // If we don't have target devices, don't bother emitting the data
7425 std::optional
<OpenMPDirectiveKind
> CaptureRegion
;
7426 if (CGM
.getLangOpts().OMPTargetTriples
.empty()) {
7427 // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7428 for (const auto *C
: S
.getClausesOfKind
<OMPUseDevicePtrClause
>())
7429 for (const Expr
*E
: C
->varlists()) {
7430 const Decl
*D
= cast
<DeclRefExpr
>(E
)->getDecl();
7431 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(D
))
7432 CGF
.EmitVarDecl(*OED
);
7434 for (const auto *C
: S
.getClausesOfKind
<OMPUseDeviceAddrClause
>())
7435 for (const Expr
*E
: C
->varlists()) {
7436 const Decl
*D
= getBaseDecl(E
);
7437 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(D
))
7438 CGF
.EmitVarDecl(*OED
);
7441 CaptureRegion
= OMPD_unknown
;
7444 OMPLexicalScope
Scope(CGF
, S
, CaptureRegion
);
7449 // Forward the provided action to the privatization codegen.
7450 RegionCodeGenTy
PrivRCG(PrivCodeGen
);
7451 PrivRCG
.setAction(Action
);
7453 // Notwithstanding the body of the region is emitted as inlined directive,
7454 // we don't use an inline scope as changes in the references inside the
7455 // region are expected to be visible outside, so we do not privative them.
7456 OMPLexicalScope
Scope(CGF
, S
);
7457 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_target_data
,
7461 RegionCodeGenTy
RCG(CodeGen
);
7463 // If we don't have target devices, don't bother emitting the data mapping
7465 if (CGM
.getLangOpts().OMPTargetTriples
.empty()) {
7470 // Check if we have any if clause associated with the directive.
7471 const Expr
*IfCond
= nullptr;
7472 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7473 IfCond
= C
->getCondition();
7475 // Check if we have any device clause associated with the directive.
7476 const Expr
*Device
= nullptr;
7477 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7478 Device
= C
->getDevice();
7480 // Set the action to signal privatization of device pointers.
7481 RCG
.setAction(PrivAction
);
7483 // Emit region code.
7484 CGM
.getOpenMPRuntime().emitTargetDataCalls(*this, S
, IfCond
, Device
, RCG
,
7488 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7489 const OMPTargetEnterDataDirective
&S
) {
7490 // If we don't have target devices, don't bother emitting the data mapping
7492 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
7495 // Check if we have any if clause associated with the directive.
7496 const Expr
*IfCond
= nullptr;
7497 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7498 IfCond
= C
->getCondition();
7500 // Check if we have any device clause associated with the directive.
7501 const Expr
*Device
= nullptr;
7502 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7503 Device
= C
->getDevice();
7505 OMPLexicalScope
Scope(*this, S
, OMPD_task
);
7506 CGM
.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S
, IfCond
, Device
);
7509 void CodeGenFunction::EmitOMPTargetExitDataDirective(
7510 const OMPTargetExitDataDirective
&S
) {
7511 // If we don't have target devices, don't bother emitting the data mapping
7513 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
7516 // Check if we have any if clause associated with the directive.
7517 const Expr
*IfCond
= nullptr;
7518 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7519 IfCond
= C
->getCondition();
7521 // Check if we have any device clause associated with the directive.
7522 const Expr
*Device
= nullptr;
7523 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7524 Device
= C
->getDevice();
7526 OMPLexicalScope
Scope(*this, S
, OMPD_task
);
7527 CGM
.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S
, IfCond
, Device
);
7530 static void emitTargetParallelRegion(CodeGenFunction
&CGF
,
7531 const OMPTargetParallelDirective
&S
,
7532 PrePostActionTy
&Action
) {
7533 // Get the captured statement associated with the 'parallel' region.
7534 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_parallel
);
7536 auto &&CodeGen
= [&S
, CS
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7538 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
7539 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
7540 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
7541 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7542 (void)PrivateScope
.Privatize();
7543 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
7544 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
7545 // TODO: Add support for clauses.
7546 CGF
.EmitStmt(CS
->getCapturedStmt());
7547 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
7549 emitCommonOMPParallelDirective(CGF
, S
, OMPD_parallel
, CodeGen
,
7550 emitEmptyBoundParameters
);
7551 emitPostUpdateForReductionClause(CGF
, S
,
7552 [](CodeGenFunction
&) { return nullptr; });
7555 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7556 CodeGenModule
&CGM
, StringRef ParentName
,
7557 const OMPTargetParallelDirective
&S
) {
7558 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7559 emitTargetParallelRegion(CGF
, S
, Action
);
7562 llvm::Constant
*Addr
;
7563 // Emit target region as a standalone region.
7564 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7565 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7566 assert(Fn
&& Addr
&& "Target device function emission failed.");
7569 void CodeGenFunction::EmitOMPTargetParallelDirective(
7570 const OMPTargetParallelDirective
&S
) {
7571 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7572 emitTargetParallelRegion(CGF
, S
, Action
);
7574 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7577 static void emitTargetParallelForRegion(CodeGenFunction
&CGF
,
7578 const OMPTargetParallelForDirective
&S
,
7579 PrePostActionTy
&Action
) {
7581 // Emit directive as a combined directive that consists of two implicit
7582 // directives: 'parallel' with 'for' directive.
7583 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7585 CodeGenFunction::OMPCancelStackRAII
CancelRegion(
7586 CGF
, OMPD_target_parallel_for
, S
.hasCancel());
7587 CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(), emitForLoopBounds
,
7588 emitDispatchForLoopBounds
);
7590 emitCommonOMPParallelDirective(CGF
, S
, OMPD_for
, CodeGen
,
7591 emitEmptyBoundParameters
);
7594 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7595 CodeGenModule
&CGM
, StringRef ParentName
,
7596 const OMPTargetParallelForDirective
&S
) {
7597 // Emit SPMD target parallel for region as a standalone region.
7598 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7599 emitTargetParallelForRegion(CGF
, S
, Action
);
7602 llvm::Constant
*Addr
;
7603 // Emit target region as a standalone region.
7604 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7605 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7606 assert(Fn
&& Addr
&& "Target device function emission failed.");
7609 void CodeGenFunction::EmitOMPTargetParallelForDirective(
7610 const OMPTargetParallelForDirective
&S
) {
7611 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7612 emitTargetParallelForRegion(CGF
, S
, Action
);
7614 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7618 emitTargetParallelForSimdRegion(CodeGenFunction
&CGF
,
7619 const OMPTargetParallelForSimdDirective
&S
,
7620 PrePostActionTy
&Action
) {
7622 // Emit directive as a combined directive that consists of two implicit
7623 // directives: 'parallel' with 'for' directive.
7624 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7626 CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(), emitForLoopBounds
,
7627 emitDispatchForLoopBounds
);
7629 emitCommonOMPParallelDirective(CGF
, S
, OMPD_simd
, CodeGen
,
7630 emitEmptyBoundParameters
);
7633 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7634 CodeGenModule
&CGM
, StringRef ParentName
,
7635 const OMPTargetParallelForSimdDirective
&S
) {
7636 // Emit SPMD target parallel for region as a standalone region.
7637 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7638 emitTargetParallelForSimdRegion(CGF
, S
, Action
);
7641 llvm::Constant
*Addr
;
7642 // Emit target region as a standalone region.
7643 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7644 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7645 assert(Fn
&& Addr
&& "Target device function emission failed.");
7648 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7649 const OMPTargetParallelForSimdDirective
&S
) {
7650 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7651 emitTargetParallelForSimdRegion(CGF
, S
, Action
);
7653 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7656 /// Emit a helper variable and return corresponding lvalue.
7657 static void mapParam(CodeGenFunction
&CGF
, const DeclRefExpr
*Helper
,
7658 const ImplicitParamDecl
*PVD
,
7659 CodeGenFunction::OMPPrivateScope
&Privates
) {
7660 const auto *VDecl
= cast
<VarDecl
>(Helper
->getDecl());
7661 Privates
.addPrivate(VDecl
, CGF
.GetAddrOfLocalVar(PVD
));
7664 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective
&S
) {
7665 assert(isOpenMPTaskLoopDirective(S
.getDirectiveKind()));
7666 // Emit outlined function for task construct.
7667 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_taskloop
);
7668 Address CapturedStruct
= Address::invalid();
7670 OMPLexicalScope
Scope(*this, S
, OMPD_taskloop
, /*EmitPreInitStmt=*/false);
7671 CapturedStruct
= GenerateCapturedStmtArgument(*CS
);
7673 QualType SharedsTy
= getContext().getRecordType(CS
->getCapturedRecordDecl());
7674 const Expr
*IfCond
= nullptr;
7675 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
7676 if (C
->getNameModifier() == OMPD_unknown
||
7677 C
->getNameModifier() == OMPD_taskloop
) {
7678 IfCond
= C
->getCondition();
7684 // Check if taskloop must be emitted without taskgroup.
7685 Data
.Nogroup
= S
.getSingleClause
<OMPNogroupClause
>();
7686 // TODO: Check if we should emit tied or untied task.
7688 // Set scheduling for taskloop
7689 if (const auto *Clause
= S
.getSingleClause
<OMPGrainsizeClause
>()) {
7691 Data
.Schedule
.setInt(/*IntVal=*/false);
7692 Data
.Schedule
.setPointer(EmitScalarExpr(Clause
->getGrainsize()));
7693 } else if (const auto *Clause
= S
.getSingleClause
<OMPNumTasksClause
>()) {
7695 Data
.Schedule
.setInt(/*IntVal=*/true);
7696 Data
.Schedule
.setPointer(EmitScalarExpr(Clause
->getNumTasks()));
7699 auto &&BodyGen
= [CS
, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7701 // for (IV in 0..LastIteration) BODY;
7702 // <Final counter/linear vars updates>;
7706 // Emit: if (PreCond) - begin.
7707 // If the condition constant folds and can be elided, avoid emitting the
7710 llvm::BasicBlock
*ContBlock
= nullptr;
7711 OMPLoopScope
PreInitScope(CGF
, S
);
7712 if (CGF
.ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
7716 llvm::BasicBlock
*ThenBlock
= CGF
.createBasicBlock("taskloop.if.then");
7717 ContBlock
= CGF
.createBasicBlock("taskloop.if.end");
7718 emitPreCond(CGF
, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
7719 CGF
.getProfileCount(&S
));
7720 CGF
.EmitBlock(ThenBlock
);
7721 CGF
.incrementProfileCounter(&S
);
7724 (void)CGF
.EmitOMPLinearClauseInit(S
);
7726 OMPPrivateScope
LoopScope(CGF
);
7727 // Emit helper vars inits.
7728 enum { LowerBound
= 5, UpperBound
, Stride
, LastIter
};
7729 auto *I
= CS
->getCapturedDecl()->param_begin();
7730 auto *LBP
= std::next(I
, LowerBound
);
7731 auto *UBP
= std::next(I
, UpperBound
);
7732 auto *STP
= std::next(I
, Stride
);
7733 auto *LIP
= std::next(I
, LastIter
);
7734 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getLowerBoundVariable()), *LBP
,
7736 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getUpperBoundVariable()), *UBP
,
7738 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getStrideVariable()), *STP
, LoopScope
);
7739 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getIsLastIterVariable()), *LIP
,
7741 CGF
.EmitOMPPrivateLoopCounters(S
, LoopScope
);
7742 CGF
.EmitOMPLinearClause(S
, LoopScope
);
7743 bool HasLastprivateClause
= CGF
.EmitOMPLastprivateClauseInit(S
, LoopScope
);
7744 (void)LoopScope
.Privatize();
7745 // Emit the loop iteration variable.
7746 const Expr
*IVExpr
= S
.getIterationVariable();
7747 const auto *IVDecl
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IVExpr
)->getDecl());
7748 CGF
.EmitVarDecl(*IVDecl
);
7749 CGF
.EmitIgnoredExpr(S
.getInit());
7751 // Emit the iterations count variable.
7752 // If it is not a variable, Sema decided to calculate iterations count on
7753 // each iteration (e.g., it is foldable into a constant).
7754 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
7755 CGF
.EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
7756 // Emit calculation of the iterations count.
7757 CGF
.EmitIgnoredExpr(S
.getCalcLastIteration());
7761 OMPLexicalScope
Scope(CGF
, S
, OMPD_taskloop
, /*EmitPreInitStmt=*/false);
7764 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7765 if (isOpenMPSimdDirective(S
.getDirectiveKind()))
7766 CGF
.EmitOMPSimdInit(S
);
7768 [&S
, &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7769 CGF
.EmitOMPInnerLoop(
7770 S
, LoopScope
.requiresCleanups(), S
.getCond(), S
.getInc(),
7771 [&S
](CodeGenFunction
&CGF
) {
7772 emitOMPLoopBodyWithStopPoint(CGF
, S
,
7773 CodeGenFunction::JumpDest());
7775 [](CodeGenFunction
&) {});
7778 // Emit: if (PreCond) - end.
7780 CGF
.EmitBranch(ContBlock
);
7781 CGF
.EmitBlock(ContBlock
, true);
7783 // Emit final copy of the lastprivate variables if IsLastIter != 0.
7784 if (HasLastprivateClause
) {
7785 CGF
.EmitOMPLastprivateClauseFinal(
7786 S
, isOpenMPSimdDirective(S
.getDirectiveKind()),
7787 CGF
.Builder
.CreateIsNotNull(CGF
.EmitLoadOfScalar(
7788 CGF
.GetAddrOfLocalVar(*LIP
), /*Volatile=*/false,
7789 (*LIP
)->getType(), S
.getBeginLoc())));
7791 LoopScope
.restoreMap();
7792 CGF
.EmitOMPLinearClauseFinal(S
, [LIP
, &S
](CodeGenFunction
&CGF
) {
7793 return CGF
.Builder
.CreateIsNotNull(
7794 CGF
.EmitLoadOfScalar(CGF
.GetAddrOfLocalVar(*LIP
), /*Volatile=*/false,
7795 (*LIP
)->getType(), S
.getBeginLoc()));
7798 auto &&TaskGen
= [&S
, SharedsTy
, CapturedStruct
,
7799 IfCond
](CodeGenFunction
&CGF
, llvm::Function
*OutlinedFn
,
7800 const OMPTaskDataTy
&Data
) {
7801 auto &&CodeGen
= [&S
, OutlinedFn
, SharedsTy
, CapturedStruct
, IfCond
,
7802 &Data
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7803 OMPLoopScope
PreInitScope(CGF
, S
);
7804 CGF
.CGM
.getOpenMPRuntime().emitTaskLoopCall(CGF
, S
.getBeginLoc(), S
,
7805 OutlinedFn
, SharedsTy
,
7806 CapturedStruct
, IfCond
, Data
);
7808 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_taskloop
,
7812 EmitOMPTaskBasedDirective(S
, OMPD_taskloop
, BodyGen
, TaskGen
, Data
);
7814 CGM
.getOpenMPRuntime().emitTaskgroupRegion(
7816 [&S
, &BodyGen
, &TaskGen
, &Data
](CodeGenFunction
&CGF
,
7817 PrePostActionTy
&Action
) {
7819 CGF
.EmitOMPTaskBasedDirective(S
, OMPD_taskloop
, BodyGen
, TaskGen
,
7826 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective
&S
) {
7828 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7829 EmitOMPTaskLoopBasedDirective(S
);
7832 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
7833 const OMPTaskLoopSimdDirective
&S
) {
7835 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7836 OMPLexicalScope
Scope(*this, S
);
7837 EmitOMPTaskLoopBasedDirective(S
);
7840 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7841 const OMPMasterTaskLoopDirective
&S
) {
7842 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7844 EmitOMPTaskLoopBasedDirective(S
);
7847 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7848 OMPLexicalScope
Scope(*this, S
, std::nullopt
, /*EmitPreInitStmt=*/false);
7849 CGM
.getOpenMPRuntime().emitMasterRegion(*this, CodeGen
, S
.getBeginLoc());
7852 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7853 const OMPMasterTaskLoopSimdDirective
&S
) {
7854 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7856 EmitOMPTaskLoopBasedDirective(S
);
7859 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7860 OMPLexicalScope
Scope(*this, S
);
7861 CGM
.getOpenMPRuntime().emitMasterRegion(*this, CodeGen
, S
.getBeginLoc());
7864 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7865 const OMPParallelMasterTaskLoopDirective
&S
) {
7866 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7867 auto &&TaskLoopCodeGen
= [&S
](CodeGenFunction
&CGF
,
7868 PrePostActionTy
&Action
) {
7870 CGF
.EmitOMPTaskLoopBasedDirective(S
);
7872 OMPLexicalScope
Scope(CGF
, S
, OMPD_parallel
, /*EmitPreInitStmt=*/false);
7873 CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, TaskLoopCodeGen
,
7877 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7878 emitCommonOMPParallelDirective(*this, S
, OMPD_master_taskloop
, CodeGen
,
7879 emitEmptyBoundParameters
);
7882 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7883 const OMPParallelMasterTaskLoopSimdDirective
&S
) {
7884 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7885 auto &&TaskLoopCodeGen
= [&S
](CodeGenFunction
&CGF
,
7886 PrePostActionTy
&Action
) {
7888 CGF
.EmitOMPTaskLoopBasedDirective(S
);
7890 OMPLexicalScope
Scope(CGF
, S
, OMPD_parallel
, /*EmitPreInitStmt=*/false);
7891 CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, TaskLoopCodeGen
,
7895 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7896 emitCommonOMPParallelDirective(*this, S
, OMPD_master_taskloop_simd
, CodeGen
,
7897 emitEmptyBoundParameters
);
7900 // Generate the instructions for '#pragma omp target update' directive.
7901 void CodeGenFunction::EmitOMPTargetUpdateDirective(
7902 const OMPTargetUpdateDirective
&S
) {
7903 // If we don't have target devices, don't bother emitting the data mapping
7905 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
7908 // Check if we have any if clause associated with the directive.
7909 const Expr
*IfCond
= nullptr;
7910 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7911 IfCond
= C
->getCondition();
7913 // Check if we have any device clause associated with the directive.
7914 const Expr
*Device
= nullptr;
7915 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7916 Device
= C
->getDevice();
7918 OMPLexicalScope
Scope(*this, S
, OMPD_task
);
7919 CGM
.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S
, IfCond
, Device
);
7922 void CodeGenFunction::EmitOMPGenericLoopDirective(
7923 const OMPGenericLoopDirective
&S
) {
7924 // Unimplemented, just inline the underlying statement for now.
7925 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7926 // Emit the loop iteration variable.
7928 cast
<CapturedStmt
>(S
.getAssociatedStmt())->getCapturedStmt();
7929 const auto *ForS
= dyn_cast
<ForStmt
>(CS
);
7930 if (ForS
&& !isa
<DeclStmt
>(ForS
->getInit())) {
7931 OMPPrivateScope
LoopScope(CGF
);
7932 CGF
.EmitOMPPrivateLoopCounters(S
, LoopScope
);
7933 (void)LoopScope
.Privatize();
7935 LoopScope
.restoreMap();
7940 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
7941 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop
, CodeGen
);
7944 void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
7945 const OMPLoopDirective
&S
) {
7946 // Emit combined directive as if its constituent constructs are 'parallel'
7948 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7950 emitOMPCopyinClause(CGF
, S
);
7951 (void)emitWorksharingDirective(CGF
, S
, /*HasCancel=*/false);
7955 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7956 emitCommonOMPParallelDirective(*this, S
, OMPD_for
, CodeGen
,
7957 emitEmptyBoundParameters
);
7959 // Check for outer lastprivate conditional update.
7960 checkForLastprivateConditionalUpdate(*this, S
);
7963 void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
7964 const OMPTeamsGenericLoopDirective
&S
) {
7965 // To be consistent with current behavior of 'target teams loop', emit
7966 // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'.
7967 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7968 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
7971 // Emit teams region as a standalone region.
7972 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
7973 PrePostActionTy
&Action
) {
7975 OMPPrivateScope
PrivateScope(CGF
);
7976 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7977 (void)PrivateScope
.Privatize();
7978 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
7980 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
7982 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute
, CodeGen
);
7983 emitPostUpdateForReductionClause(*this, S
,
7984 [](CodeGenFunction
&) { return nullptr; });
7988 static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction
&CGF
,
7989 std::string StatusMsg
,
7990 const OMPExecutableDirective
&D
) {
7991 bool IsDevice
= CGF
.CGM
.getLangOpts().OpenMPIsTargetDevice
;
7993 StatusMsg
+= ": DEVICE";
7995 StatusMsg
+= ": HOST";
7996 SourceLocation L
= D
.getBeginLoc();
7997 auto &SM
= CGF
.getContext().getSourceManager();
7998 PresumedLoc PLoc
= SM
.getPresumedLoc(L
);
7999 const char *FileName
= PLoc
.isValid() ? PLoc
.getFilename() : nullptr;
8001 PLoc
.isValid() ? PLoc
.getLine() : SM
.getExpansionLineNumber(L
);
8002 llvm::dbgs() << StatusMsg
<< ": " << FileName
<< ": " << LineNo
<< "\n";
8006 static void emitTargetTeamsGenericLoopRegionAsParallel(
8007 CodeGenFunction
&CGF
, PrePostActionTy
&Action
,
8008 const OMPTargetTeamsGenericLoopDirective
&S
) {
8010 // Emit 'teams loop' as if its constituent constructs are 'distribute,
8011 // 'parallel, and 'for'.
8012 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
8013 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
8017 // Emit teams region as a standalone region.
8018 auto &&CodeGenTeams
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
8019 PrePostActionTy
&Action
) {
8021 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
8022 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
8023 (void)PrivateScope
.Privatize();
8024 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
8025 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
8026 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
8028 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE
,
8029 emitTargetTeamsLoopCodegenStatus(
8030 CGF
, TTL_CODEGEN_TYPE
" as parallel for", S
));
8031 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute_parallel_for
,
8033 emitPostUpdateForReductionClause(CGF
, S
,
8034 [](CodeGenFunction
&) { return nullptr; });
8037 static void emitTargetTeamsGenericLoopRegionAsDistribute(
8038 CodeGenFunction
&CGF
, PrePostActionTy
&Action
,
8039 const OMPTargetTeamsGenericLoopDirective
&S
) {
8041 // Emit 'teams loop' as if its constituent construct is 'distribute'.
8042 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
8043 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
8046 // Emit teams region as a standalone region.
8047 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
8048 PrePostActionTy
&Action
) {
8050 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
8051 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
8052 (void)PrivateScope
.Privatize();
8053 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
8054 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
8055 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
8057 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE
,
8058 emitTargetTeamsLoopCodegenStatus(
8059 CGF
, TTL_CODEGEN_TYPE
" as distribute", S
));
8060 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute
, CodeGen
);
8061 emitPostUpdateForReductionClause(CGF
, S
,
8062 [](CodeGenFunction
&) { return nullptr; });
8065 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
8066 const OMPTargetTeamsGenericLoopDirective
&S
) {
8067 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
8068 if (S
.canBeParallelFor())
8069 emitTargetTeamsGenericLoopRegionAsParallel(CGF
, Action
, S
);
8071 emitTargetTeamsGenericLoopRegionAsDistribute(CGF
, Action
, S
);
8073 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
8076 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
8077 CodeGenModule
&CGM
, StringRef ParentName
,
8078 const OMPTargetTeamsGenericLoopDirective
&S
) {
8079 // Emit SPMD target parallel loop region as a standalone region.
8080 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
8081 if (S
.canBeParallelFor())
8082 emitTargetTeamsGenericLoopRegionAsParallel(CGF
, Action
, S
);
8084 emitTargetTeamsGenericLoopRegionAsDistribute(CGF
, Action
, S
);
8087 llvm::Constant
*Addr
;
8088 // Emit target region as a standalone region.
8089 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
8090 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
8091 assert(Fn
&& Addr
&&
8092 "Target device function emission failed for 'target teams loop'.");
8095 static void emitTargetParallelGenericLoopRegion(
8096 CodeGenFunction
&CGF
, const OMPTargetParallelGenericLoopDirective
&S
,
8097 PrePostActionTy
&Action
) {
8099 // Emit as 'parallel for'.
8100 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
8102 CodeGenFunction::OMPCancelStackRAII
CancelRegion(
8103 CGF
, OMPD_target_parallel_loop
, /*hasCancel=*/false);
8104 CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(), emitForLoopBounds
,
8105 emitDispatchForLoopBounds
);
8107 emitCommonOMPParallelDirective(CGF
, S
, OMPD_for
, CodeGen
,
8108 emitEmptyBoundParameters
);
8111 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
8112 CodeGenModule
&CGM
, StringRef ParentName
,
8113 const OMPTargetParallelGenericLoopDirective
&S
) {
8114 // Emit target parallel loop region as a standalone region.
8115 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
8116 emitTargetParallelGenericLoopRegion(CGF
, S
, Action
);
8119 llvm::Constant
*Addr
;
8120 // Emit target region as a standalone region.
8121 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
8122 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
8123 assert(Fn
&& Addr
&& "Target device function emission failed.");
8126 /// Emit combined directive 'target parallel loop' as if its constituent
8127 /// constructs are 'target', 'parallel', and 'for'.
8128 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
8129 const OMPTargetParallelGenericLoopDirective
&S
) {
8130 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
8131 emitTargetParallelGenericLoopRegion(CGF
, S
, Action
);
8133 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
8136 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
8137 const OMPExecutableDirective
&D
) {
8138 if (const auto *SD
= dyn_cast
<OMPScanDirective
>(&D
)) {
8139 EmitOMPScanDirective(*SD
);
8142 if (!D
.hasAssociatedStmt() || !D
.getAssociatedStmt())
8144 auto &&CodeGen
= [&D
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
8145 OMPPrivateScope
GlobalsScope(CGF
);
8146 if (isOpenMPTaskingDirective(D
.getDirectiveKind())) {
8147 // Capture global firstprivates to avoid crash.
8148 for (const auto *C
: D
.getClausesOfKind
<OMPFirstprivateClause
>()) {
8149 for (const Expr
*Ref
: C
->varlists()) {
8150 const auto *DRE
= cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
8153 const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl());
8154 if (!VD
|| VD
->hasLocalStorage())
8156 if (!CGF
.LocalDeclMap
.count(VD
)) {
8157 LValue GlobLVal
= CGF
.EmitLValue(Ref
);
8158 GlobalsScope
.addPrivate(VD
, GlobLVal
.getAddress());
8163 if (isOpenMPSimdDirective(D
.getDirectiveKind())) {
8164 (void)GlobalsScope
.Privatize();
8165 ParentLoopDirectiveForScanRegion
ScanRegion(CGF
, D
);
8166 emitOMPSimdRegion(CGF
, cast
<OMPLoopDirective
>(D
), Action
);
8168 if (const auto *LD
= dyn_cast
<OMPLoopDirective
>(&D
)) {
8169 for (const Expr
*E
: LD
->counters()) {
8170 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
8171 if (!VD
->hasLocalStorage() && !CGF
.LocalDeclMap
.count(VD
)) {
8172 LValue GlobLVal
= CGF
.EmitLValue(E
);
8173 GlobalsScope
.addPrivate(VD
, GlobLVal
.getAddress());
8175 if (isa
<OMPCapturedExprDecl
>(VD
)) {
8176 // Emit only those that were not explicitly referenced in clauses.
8177 if (!CGF
.LocalDeclMap
.count(VD
))
8178 CGF
.EmitVarDecl(*VD
);
8181 for (const auto *C
: D
.getClausesOfKind
<OMPOrderedClause
>()) {
8182 if (!C
->getNumForLoops())
8184 for (unsigned I
= LD
->getLoopsNumber(),
8185 E
= C
->getLoopNumIterations().size();
8187 if (const auto *VD
= dyn_cast
<OMPCapturedExprDecl
>(
8188 cast
<DeclRefExpr
>(C
->getLoopCounter(I
))->getDecl())) {
8189 // Emit only those that were not explicitly referenced in clauses.
8190 if (!CGF
.LocalDeclMap
.count(VD
))
8191 CGF
.EmitVarDecl(*VD
);
8196 (void)GlobalsScope
.Privatize();
8197 CGF
.EmitStmt(D
.getInnermostCapturedStmt()->getCapturedStmt());
8200 if (D
.getDirectiveKind() == OMPD_atomic
||
8201 D
.getDirectiveKind() == OMPD_critical
||
8202 D
.getDirectiveKind() == OMPD_section
||
8203 D
.getDirectiveKind() == OMPD_master
||
8204 D
.getDirectiveKind() == OMPD_masked
||
8205 D
.getDirectiveKind() == OMPD_unroll
) {
8206 EmitStmt(D
.getAssociatedStmt());
8209 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D
);
8210 OMPSimdLexicalScope
Scope(*this, D
);
8211 CGM
.getOpenMPRuntime().emitInlinedDirective(
8213 isOpenMPSimdDirective(D
.getDirectiveKind()) ? OMPD_simd
8214 : D
.getDirectiveKind(),
8217 // Check for outer lastprivate conditional update.
8218 checkForLastprivateConditionalUpdate(*this, D
);