1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This contains code to emit OpenMP nodes as LLVM code.
11 //===----------------------------------------------------------------------===//
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/BinaryFormat/Dwarf.h"
29 #include "llvm/Frontend/OpenMP/OMPConstants.h"
30 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/DebugInfoMetadata.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Metadata.h"
36 #include "llvm/Support/AtomicOrdering.h"
38 using namespace clang
;
39 using namespace CodeGen
;
40 using namespace llvm::omp
;
42 static const VarDecl
*getBaseDecl(const Expr
*Ref
);
45 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
46 /// for captured expressions.
47 class OMPLexicalScope
: public CodeGenFunction::LexicalScope
{
48 void emitPreInitStmt(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
49 for (const auto *C
: S
.clauses()) {
50 if (const auto *CPI
= OMPClauseWithPreInit::get(C
)) {
51 if (const auto *PreInit
=
52 cast_or_null
<DeclStmt
>(CPI
->getPreInitStmt())) {
53 for (const auto *I
: PreInit
->decls()) {
54 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
55 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
57 CodeGenFunction::AutoVarEmission Emission
=
58 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
59 CGF
.EmitAutoVarCleanups(Emission
);
66 CodeGenFunction::OMPPrivateScope InlinedShareds
;
68 static bool isCapturedVar(CodeGenFunction
&CGF
, const VarDecl
*VD
) {
69 return CGF
.LambdaCaptureFields
.lookup(VD
) ||
70 (CGF
.CapturedStmtInfo
&& CGF
.CapturedStmtInfo
->lookup(VD
)) ||
71 (CGF
.CurCodeDecl
&& isa
<BlockDecl
>(CGF
.CurCodeDecl
) &&
72 cast
<BlockDecl
>(CGF
.CurCodeDecl
)->capturesVariable(VD
));
77 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
78 const std::optional
<OpenMPDirectiveKind
> CapturedRegion
= std::nullopt
,
79 const bool EmitPreInitStmt
= true)
80 : CodeGenFunction::LexicalScope(CGF
, S
.getSourceRange()),
83 emitPreInitStmt(CGF
, S
);
86 assert(S
.hasAssociatedStmt() &&
87 "Expected associated statement for inlined directive.");
88 const CapturedStmt
*CS
= S
.getCapturedStmt(*CapturedRegion
);
89 for (const auto &C
: CS
->captures()) {
90 if (C
.capturesVariable() || C
.capturesVariableByCopy()) {
91 auto *VD
= C
.getCapturedVar();
92 assert(VD
== VD
->getCanonicalDecl() &&
93 "Canonical decl must be captured.");
95 CGF
.getContext(), const_cast<VarDecl
*>(VD
),
96 isCapturedVar(CGF
, VD
) || (CGF
.CapturedStmtInfo
&&
97 InlinedShareds
.isGlobalVarCaptured(VD
)),
98 VD
->getType().getNonReferenceType(), VK_LValue
, C
.getLocation());
99 InlinedShareds
.addPrivate(VD
, CGF
.EmitLValue(&DRE
).getAddress(CGF
));
102 (void)InlinedShareds
.Privatize();
106 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
107 /// for captured expressions.
108 class OMPParallelScope final
: public OMPLexicalScope
{
109 bool EmitPreInitStmt(const OMPExecutableDirective
&S
) {
110 OpenMPDirectiveKind Kind
= S
.getDirectiveKind();
111 return !(isOpenMPTargetExecutionDirective(Kind
) ||
112 isOpenMPLoopBoundSharingDirective(Kind
)) &&
113 isOpenMPParallelDirective(Kind
);
117 OMPParallelScope(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
118 : OMPLexicalScope(CGF
, S
, /*CapturedRegion=*/std::nullopt
,
119 EmitPreInitStmt(S
)) {}
122 /// Lexical scope for OpenMP teams construct, that handles correct codegen
123 /// for captured expressions.
124 class OMPTeamsScope final
: public OMPLexicalScope
{
125 bool EmitPreInitStmt(const OMPExecutableDirective
&S
) {
126 OpenMPDirectiveKind Kind
= S
.getDirectiveKind();
127 return !isOpenMPTargetExecutionDirective(Kind
) &&
128 isOpenMPTeamsDirective(Kind
);
132 OMPTeamsScope(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
133 : OMPLexicalScope(CGF
, S
, /*CapturedRegion=*/std::nullopt
,
134 EmitPreInitStmt(S
)) {}
137 /// Private scope for OpenMP loop-based directives, that supports capturing
138 /// of used expression from loop statement.
139 class OMPLoopScope
: public CodeGenFunction::RunCleanupsScope
{
140 void emitPreInitStmt(CodeGenFunction
&CGF
, const OMPLoopBasedDirective
&S
) {
141 const DeclStmt
*PreInits
;
142 CodeGenFunction::OMPMapVars PreCondVars
;
143 if (auto *LD
= dyn_cast
<OMPLoopDirective
>(&S
)) {
144 llvm::DenseSet
<const VarDecl
*> EmittedAsPrivate
;
145 for (const auto *E
: LD
->counters()) {
146 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
147 EmittedAsPrivate
.insert(VD
->getCanonicalDecl());
148 (void)PreCondVars
.setVarAddr(
149 CGF
, VD
, CGF
.CreateMemTemp(VD
->getType().getNonReferenceType()));
151 // Mark private vars as undefs.
152 for (const auto *C
: LD
->getClausesOfKind
<OMPPrivateClause
>()) {
153 for (const Expr
*IRef
: C
->varlists()) {
155 cast
<VarDecl
>(cast
<DeclRefExpr
>(IRef
)->getDecl());
156 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
157 QualType OrigVDTy
= OrigVD
->getType().getNonReferenceType();
158 (void)PreCondVars
.setVarAddr(
160 Address(llvm::UndefValue::get(CGF
.ConvertTypeForMem(
161 CGF
.getContext().getPointerType(OrigVDTy
))),
162 CGF
.ConvertTypeForMem(OrigVDTy
),
163 CGF
.getContext().getDeclAlign(OrigVD
)));
167 (void)PreCondVars
.apply(CGF
);
168 // Emit init, __range and __end variables for C++ range loops.
169 (void)OMPLoopBasedDirective::doForAllLoops(
170 LD
->getInnermostCapturedStmt()->getCapturedStmt(),
171 /*TryImperfectlyNestedLoops=*/true, LD
->getLoopsNumber(),
172 [&CGF
](unsigned Cnt
, const Stmt
*CurStmt
) {
173 if (const auto *CXXFor
= dyn_cast
<CXXForRangeStmt
>(CurStmt
)) {
174 if (const Stmt
*Init
= CXXFor
->getInit())
176 CGF
.EmitStmt(CXXFor
->getRangeStmt());
177 CGF
.EmitStmt(CXXFor
->getEndStmt());
181 PreInits
= cast_or_null
<DeclStmt
>(LD
->getPreInits());
182 } else if (const auto *Tile
= dyn_cast
<OMPTileDirective
>(&S
)) {
183 PreInits
= cast_or_null
<DeclStmt
>(Tile
->getPreInits());
184 } else if (const auto *Unroll
= dyn_cast
<OMPUnrollDirective
>(&S
)) {
185 PreInits
= cast_or_null
<DeclStmt
>(Unroll
->getPreInits());
187 llvm_unreachable("Unknown loop-based directive kind.");
190 for (const auto *I
: PreInits
->decls())
191 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
193 PreCondVars
.restore(CGF
);
197 OMPLoopScope(CodeGenFunction
&CGF
, const OMPLoopBasedDirective
&S
)
198 : CodeGenFunction::RunCleanupsScope(CGF
) {
199 emitPreInitStmt(CGF
, S
);
203 class OMPSimdLexicalScope
: public CodeGenFunction::LexicalScope
{
204 CodeGenFunction::OMPPrivateScope InlinedShareds
;
206 static bool isCapturedVar(CodeGenFunction
&CGF
, const VarDecl
*VD
) {
207 return CGF
.LambdaCaptureFields
.lookup(VD
) ||
208 (CGF
.CapturedStmtInfo
&& CGF
.CapturedStmtInfo
->lookup(VD
)) ||
209 (CGF
.CurCodeDecl
&& isa
<BlockDecl
>(CGF
.CurCodeDecl
) &&
210 cast
<BlockDecl
>(CGF
.CurCodeDecl
)->capturesVariable(VD
));
214 OMPSimdLexicalScope(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
215 : CodeGenFunction::LexicalScope(CGF
, S
.getSourceRange()),
216 InlinedShareds(CGF
) {
217 for (const auto *C
: S
.clauses()) {
218 if (const auto *CPI
= OMPClauseWithPreInit::get(C
)) {
219 if (const auto *PreInit
=
220 cast_or_null
<DeclStmt
>(CPI
->getPreInitStmt())) {
221 for (const auto *I
: PreInit
->decls()) {
222 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
223 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
225 CodeGenFunction::AutoVarEmission Emission
=
226 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
227 CGF
.EmitAutoVarCleanups(Emission
);
231 } else if (const auto *UDP
= dyn_cast
<OMPUseDevicePtrClause
>(C
)) {
232 for (const Expr
*E
: UDP
->varlists()) {
233 const Decl
*D
= cast
<DeclRefExpr
>(E
)->getDecl();
234 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(D
))
235 CGF
.EmitVarDecl(*OED
);
237 } else if (const auto *UDP
= dyn_cast
<OMPUseDeviceAddrClause
>(C
)) {
238 for (const Expr
*E
: UDP
->varlists()) {
239 const Decl
*D
= getBaseDecl(E
);
240 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(D
))
241 CGF
.EmitVarDecl(*OED
);
245 if (!isOpenMPSimdDirective(S
.getDirectiveKind()))
246 CGF
.EmitOMPPrivateClause(S
, InlinedShareds
);
247 if (const auto *TG
= dyn_cast
<OMPTaskgroupDirective
>(&S
)) {
248 if (const Expr
*E
= TG
->getReductionRef())
249 CGF
.EmitVarDecl(*cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl()));
251 // Temp copy arrays for inscan reductions should not be emitted as they are
252 // not used in simd only mode.
253 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> CopyArrayTemps
;
254 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
255 if (C
->getModifier() != OMPC_REDUCTION_inscan
)
257 for (const Expr
*E
: C
->copy_array_temps())
258 CopyArrayTemps
.insert(cast
<DeclRefExpr
>(E
)->getDecl());
260 const auto *CS
= cast_or_null
<CapturedStmt
>(S
.getAssociatedStmt());
262 for (auto &C
: CS
->captures()) {
263 if (C
.capturesVariable() || C
.capturesVariableByCopy()) {
264 auto *VD
= C
.getCapturedVar();
265 if (CopyArrayTemps
.contains(VD
))
267 assert(VD
== VD
->getCanonicalDecl() &&
268 "Canonical decl must be captured.");
269 DeclRefExpr
DRE(CGF
.getContext(), const_cast<VarDecl
*>(VD
),
270 isCapturedVar(CGF
, VD
) ||
271 (CGF
.CapturedStmtInfo
&&
272 InlinedShareds
.isGlobalVarCaptured(VD
)),
273 VD
->getType().getNonReferenceType(), VK_LValue
,
275 InlinedShareds
.addPrivate(VD
, CGF
.EmitLValue(&DRE
).getAddress(CGF
));
278 CS
= dyn_cast
<CapturedStmt
>(CS
->getCapturedStmt());
280 (void)InlinedShareds
.Privatize();
286 static void emitCommonOMPTargetDirective(CodeGenFunction
&CGF
,
287 const OMPExecutableDirective
&S
,
288 const RegionCodeGenTy
&CodeGen
);
290 LValue
CodeGenFunction::EmitOMPSharedLValue(const Expr
*E
) {
291 if (const auto *OrigDRE
= dyn_cast
<DeclRefExpr
>(E
)) {
292 if (const auto *OrigVD
= dyn_cast
<VarDecl
>(OrigDRE
->getDecl())) {
293 OrigVD
= OrigVD
->getCanonicalDecl();
295 LambdaCaptureFields
.lookup(OrigVD
) ||
296 (CapturedStmtInfo
&& CapturedStmtInfo
->lookup(OrigVD
)) ||
297 (CurCodeDecl
&& isa
<BlockDecl
>(CurCodeDecl
));
298 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
), IsCaptured
,
299 OrigDRE
->getType(), VK_LValue
, OrigDRE
->getExprLoc());
300 return EmitLValue(&DRE
);
303 return EmitLValue(E
);
306 llvm::Value
*CodeGenFunction::getTypeSize(QualType Ty
) {
307 ASTContext
&C
= getContext();
308 llvm::Value
*Size
= nullptr;
309 auto SizeInChars
= C
.getTypeSizeInChars(Ty
);
310 if (SizeInChars
.isZero()) {
311 // getTypeSizeInChars() returns 0 for a VLA.
312 while (const VariableArrayType
*VAT
= C
.getAsVariableArrayType(Ty
)) {
313 VlaSizePair VlaSize
= getVLASize(VAT
);
316 Size
? Builder
.CreateNUWMul(Size
, VlaSize
.NumElts
) : VlaSize
.NumElts
;
318 SizeInChars
= C
.getTypeSizeInChars(Ty
);
319 if (SizeInChars
.isZero())
320 return llvm::ConstantInt::get(SizeTy
, /*V=*/0);
321 return Builder
.CreateNUWMul(Size
, CGM
.getSize(SizeInChars
));
323 return CGM
.getSize(SizeInChars
);
326 void CodeGenFunction::GenerateOpenMPCapturedVars(
327 const CapturedStmt
&S
, SmallVectorImpl
<llvm::Value
*> &CapturedVars
) {
328 const RecordDecl
*RD
= S
.getCapturedRecordDecl();
329 auto CurField
= RD
->field_begin();
330 auto CurCap
= S
.captures().begin();
331 for (CapturedStmt::const_capture_init_iterator I
= S
.capture_init_begin(),
332 E
= S
.capture_init_end();
333 I
!= E
; ++I
, ++CurField
, ++CurCap
) {
334 if (CurField
->hasCapturedVLAType()) {
335 const VariableArrayType
*VAT
= CurField
->getCapturedVLAType();
336 llvm::Value
*Val
= VLASizeMap
[VAT
->getSizeExpr()];
337 CapturedVars
.push_back(Val
);
338 } else if (CurCap
->capturesThis()) {
339 CapturedVars
.push_back(CXXThisValue
);
340 } else if (CurCap
->capturesVariableByCopy()) {
341 llvm::Value
*CV
= EmitLoadOfScalar(EmitLValue(*I
), CurCap
->getLocation());
343 // If the field is not a pointer, we need to save the actual value
344 // and load it as a void pointer.
345 if (!CurField
->getType()->isAnyPointerType()) {
346 ASTContext
&Ctx
= getContext();
347 Address DstAddr
= CreateMemTemp(
348 Ctx
.getUIntPtrType(),
349 Twine(CurCap
->getCapturedVar()->getName(), ".casted"));
350 LValue DstLV
= MakeAddrLValue(DstAddr
, Ctx
.getUIntPtrType());
352 llvm::Value
*SrcAddrVal
= EmitScalarConversion(
353 DstAddr
.getPointer(), Ctx
.getPointerType(Ctx
.getUIntPtrType()),
354 Ctx
.getPointerType(CurField
->getType()), CurCap
->getLocation());
356 MakeNaturalAlignAddrLValue(SrcAddrVal
, CurField
->getType());
358 // Store the value using the source type pointer.
359 EmitStoreThroughLValue(RValue::get(CV
), SrcLV
);
361 // Load the value using the destination type pointer.
362 CV
= EmitLoadOfScalar(DstLV
, CurCap
->getLocation());
364 CapturedVars
.push_back(CV
);
366 assert(CurCap
->capturesVariable() && "Expected capture by reference.");
367 CapturedVars
.push_back(EmitLValue(*I
).getAddress(*this).getPointer());
372 static Address
castValueFromUintptr(CodeGenFunction
&CGF
, SourceLocation Loc
,
373 QualType DstType
, StringRef Name
,
375 ASTContext
&Ctx
= CGF
.getContext();
377 llvm::Value
*CastedPtr
= CGF
.EmitScalarConversion(
378 AddrLV
.getAddress(CGF
).getPointer(), Ctx
.getUIntPtrType(),
379 Ctx
.getPointerType(DstType
), Loc
);
381 CGF
.MakeNaturalAlignAddrLValue(CastedPtr
, DstType
).getAddress(CGF
);
385 static QualType
getCanonicalParamType(ASTContext
&C
, QualType T
) {
386 if (T
->isLValueReferenceType())
387 return C
.getLValueReferenceType(
388 getCanonicalParamType(C
, T
.getNonReferenceType()),
389 /*SpelledAsLValue=*/false);
390 if (T
->isPointerType())
391 return C
.getPointerType(getCanonicalParamType(C
, T
->getPointeeType()));
392 if (const ArrayType
*A
= T
->getAsArrayTypeUnsafe()) {
393 if (const auto *VLA
= dyn_cast
<VariableArrayType
>(A
))
394 return getCanonicalParamType(C
, VLA
->getElementType());
395 if (!A
->isVariablyModifiedType())
396 return C
.getCanonicalType(T
);
398 return C
.getCanonicalParamType(T
);
402 /// Contains required data for proper outlined function codegen.
403 struct FunctionOptions
{
404 /// Captured statement for which the function is generated.
405 const CapturedStmt
*S
= nullptr;
406 /// true if cast to/from UIntPtr is required for variables captured by
408 const bool UIntPtrCastRequired
= true;
409 /// true if only casted arguments must be registered as local args or VLA
411 const bool RegisterCastedArgsOnly
= false;
412 /// Name of the generated function.
413 const StringRef FunctionName
;
414 /// Location of the non-debug version of the outlined function.
416 explicit FunctionOptions(const CapturedStmt
*S
, bool UIntPtrCastRequired
,
417 bool RegisterCastedArgsOnly
, StringRef FunctionName
,
419 : S(S
), UIntPtrCastRequired(UIntPtrCastRequired
),
420 RegisterCastedArgsOnly(UIntPtrCastRequired
&& RegisterCastedArgsOnly
),
421 FunctionName(FunctionName
), Loc(Loc
) {}
425 static llvm::Function
*emitOutlinedFunctionPrologue(
426 CodeGenFunction
&CGF
, FunctionArgList
&Args
,
427 llvm::MapVector
<const Decl
*, std::pair
<const VarDecl
*, Address
>>
429 llvm::DenseMap
<const Decl
*, std::pair
<const Expr
*, llvm::Value
*>>
431 llvm::Value
*&CXXThisValue
, const FunctionOptions
&FO
) {
432 const CapturedDecl
*CD
= FO
.S
->getCapturedDecl();
433 const RecordDecl
*RD
= FO
.S
->getCapturedRecordDecl();
434 assert(CD
->hasBody() && "missing CapturedDecl body");
436 CXXThisValue
= nullptr;
437 // Build the argument list.
438 CodeGenModule
&CGM
= CGF
.CGM
;
439 ASTContext
&Ctx
= CGM
.getContext();
440 FunctionArgList TargetArgs
;
441 Args
.append(CD
->param_begin(),
442 std::next(CD
->param_begin(), CD
->getContextParamPosition()));
445 std::next(CD
->param_begin(), CD
->getContextParamPosition()));
446 auto I
= FO
.S
->captures().begin();
447 FunctionDecl
*DebugFunctionDecl
= nullptr;
448 if (!FO
.UIntPtrCastRequired
) {
449 FunctionProtoType::ExtProtoInfo EPI
;
450 QualType FunctionTy
= Ctx
.getFunctionType(Ctx
.VoidTy
, std::nullopt
, EPI
);
451 DebugFunctionDecl
= FunctionDecl::Create(
452 Ctx
, Ctx
.getTranslationUnitDecl(), FO
.S
->getBeginLoc(),
453 SourceLocation(), DeclarationName(), FunctionTy
,
454 Ctx
.getTrivialTypeSourceInfo(FunctionTy
), SC_Static
,
455 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
456 /*hasWrittenPrototype=*/false);
458 for (const FieldDecl
*FD
: RD
->fields()) {
459 QualType ArgType
= FD
->getType();
460 IdentifierInfo
*II
= nullptr;
461 VarDecl
*CapVar
= nullptr;
463 // If this is a capture by copy and the type is not a pointer, the outlined
464 // function argument type should be uintptr and the value properly casted to
465 // uintptr. This is necessary given that the runtime library is only able to
466 // deal with pointers. We can pass in the same way the VLA type sizes to the
467 // outlined function.
468 if (FO
.UIntPtrCastRequired
&&
469 ((I
->capturesVariableByCopy() && !ArgType
->isAnyPointerType()) ||
470 I
->capturesVariableArrayType()))
471 ArgType
= Ctx
.getUIntPtrType();
473 if (I
->capturesVariable() || I
->capturesVariableByCopy()) {
474 CapVar
= I
->getCapturedVar();
475 II
= CapVar
->getIdentifier();
476 } else if (I
->capturesThis()) {
477 II
= &Ctx
.Idents
.get("this");
479 assert(I
->capturesVariableArrayType());
480 II
= &Ctx
.Idents
.get("vla");
482 if (ArgType
->isVariablyModifiedType())
483 ArgType
= getCanonicalParamType(Ctx
, ArgType
);
485 if (CapVar
&& (CapVar
->getTLSKind() != clang::VarDecl::TLS_None
)) {
486 Arg
= ImplicitParamDecl::Create(Ctx
, /*DC=*/nullptr, FD
->getLocation(),
488 ImplicitParamDecl::ThreadPrivateVar
);
489 } else if (DebugFunctionDecl
&& (CapVar
|| I
->capturesThis())) {
490 Arg
= ParmVarDecl::Create(
491 Ctx
, DebugFunctionDecl
,
492 CapVar
? CapVar
->getBeginLoc() : FD
->getBeginLoc(),
493 CapVar
? CapVar
->getLocation() : FD
->getLocation(), II
, ArgType
,
494 /*TInfo=*/nullptr, SC_None
, /*DefArg=*/nullptr);
496 Arg
= ImplicitParamDecl::Create(Ctx
, /*DC=*/nullptr, FD
->getLocation(),
497 II
, ArgType
, ImplicitParamDecl::Other
);
499 Args
.emplace_back(Arg
);
500 // Do not cast arguments if we emit function with non-original types.
501 TargetArgs
.emplace_back(
502 FO
.UIntPtrCastRequired
504 : CGM
.getOpenMPRuntime().translateParameter(FD
, Arg
));
507 Args
.append(std::next(CD
->param_begin(), CD
->getContextParamPosition() + 1),
510 std::next(CD
->param_begin(), CD
->getContextParamPosition() + 1),
513 // Create the function declaration.
514 const CGFunctionInfo
&FuncInfo
=
515 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(Ctx
.VoidTy
, TargetArgs
);
516 llvm::FunctionType
*FuncLLVMTy
= CGM
.getTypes().GetFunctionType(FuncInfo
);
519 llvm::Function::Create(FuncLLVMTy
, llvm::GlobalValue::InternalLinkage
,
520 FO
.FunctionName
, &CGM
.getModule());
521 CGM
.SetInternalFunctionAttributes(CD
, F
, FuncInfo
);
523 F
->setDoesNotThrow();
524 F
->setDoesNotRecurse();
526 // Always inline the outlined function if optimizations are enabled.
527 if (CGM
.getCodeGenOpts().OptimizationLevel
!= 0) {
528 F
->removeFnAttr(llvm::Attribute::NoInline
);
529 F
->addFnAttr(llvm::Attribute::AlwaysInline
);
532 // Generate the function.
533 CGF
.StartFunction(CD
, Ctx
.VoidTy
, F
, FuncInfo
, TargetArgs
,
534 FO
.UIntPtrCastRequired
? FO
.Loc
: FO
.S
->getBeginLoc(),
535 FO
.UIntPtrCastRequired
? FO
.Loc
536 : CD
->getBody()->getBeginLoc());
537 unsigned Cnt
= CD
->getContextParamPosition();
538 I
= FO
.S
->captures().begin();
539 for (const FieldDecl
*FD
: RD
->fields()) {
540 // Do not map arguments if we emit function with non-original types.
541 Address
LocalAddr(Address::invalid());
542 if (!FO
.UIntPtrCastRequired
&& Args
[Cnt
] != TargetArgs
[Cnt
]) {
543 LocalAddr
= CGM
.getOpenMPRuntime().getParameterAddress(CGF
, Args
[Cnt
],
546 LocalAddr
= CGF
.GetAddrOfLocalVar(Args
[Cnt
]);
548 // If we are capturing a pointer by copy we don't need to do anything, just
549 // use the value that we get from the arguments.
550 if (I
->capturesVariableByCopy() && FD
->getType()->isAnyPointerType()) {
551 const VarDecl
*CurVD
= I
->getCapturedVar();
552 if (!FO
.RegisterCastedArgsOnly
)
553 LocalAddrs
.insert({Args
[Cnt
], {CurVD
, LocalAddr
}});
559 LValue ArgLVal
= CGF
.MakeAddrLValue(LocalAddr
, Args
[Cnt
]->getType(),
560 AlignmentSource::Decl
);
561 if (FD
->hasCapturedVLAType()) {
562 if (FO
.UIntPtrCastRequired
) {
563 ArgLVal
= CGF
.MakeAddrLValue(
564 castValueFromUintptr(CGF
, I
->getLocation(), FD
->getType(),
565 Args
[Cnt
]->getName(), ArgLVal
),
566 FD
->getType(), AlignmentSource::Decl
);
568 llvm::Value
*ExprArg
= CGF
.EmitLoadOfScalar(ArgLVal
, I
->getLocation());
569 const VariableArrayType
*VAT
= FD
->getCapturedVLAType();
570 VLASizes
.try_emplace(Args
[Cnt
], VAT
->getSizeExpr(), ExprArg
);
571 } else if (I
->capturesVariable()) {
572 const VarDecl
*Var
= I
->getCapturedVar();
573 QualType VarTy
= Var
->getType();
574 Address ArgAddr
= ArgLVal
.getAddress(CGF
);
575 if (ArgLVal
.getType()->isLValueReferenceType()) {
576 ArgAddr
= CGF
.EmitLoadOfReference(ArgLVal
);
577 } else if (!VarTy
->isVariablyModifiedType() || !VarTy
->isPointerType()) {
578 assert(ArgLVal
.getType()->isPointerType());
579 ArgAddr
= CGF
.EmitLoadOfPointer(
580 ArgAddr
, ArgLVal
.getType()->castAs
<PointerType
>());
582 if (!FO
.RegisterCastedArgsOnly
) {
584 {Args
[Cnt
], {Var
, ArgAddr
.withAlignment(Ctx
.getDeclAlign(Var
))}});
586 } else if (I
->capturesVariableByCopy()) {
587 assert(!FD
->getType()->isAnyPointerType() &&
588 "Not expecting a captured pointer.");
589 const VarDecl
*Var
= I
->getCapturedVar();
590 LocalAddrs
.insert({Args
[Cnt
],
591 {Var
, FO
.UIntPtrCastRequired
592 ? castValueFromUintptr(
593 CGF
, I
->getLocation(), FD
->getType(),
594 Args
[Cnt
]->getName(), ArgLVal
)
595 : ArgLVal
.getAddress(CGF
)}});
597 // If 'this' is captured, load it into CXXThisValue.
598 assert(I
->capturesThis());
599 CXXThisValue
= CGF
.EmitLoadOfScalar(ArgLVal
, I
->getLocation());
600 LocalAddrs
.insert({Args
[Cnt
], {nullptr, ArgLVal
.getAddress(CGF
)}});
610 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt
&S
,
611 SourceLocation Loc
) {
614 "CapturedStmtInfo should be set when generating the captured function");
615 const CapturedDecl
*CD
= S
.getCapturedDecl();
616 // Build the argument list.
617 bool NeedWrapperFunction
=
618 getDebugInfo() && CGM
.getCodeGenOpts().hasReducedDebugInfo();
619 FunctionArgList Args
;
620 llvm::MapVector
<const Decl
*, std::pair
<const VarDecl
*, Address
>> LocalAddrs
;
621 llvm::DenseMap
<const Decl
*, std::pair
<const Expr
*, llvm::Value
*>> VLASizes
;
622 SmallString
<256> Buffer
;
623 llvm::raw_svector_ostream
Out(Buffer
);
624 Out
<< CapturedStmtInfo
->getHelperName();
625 if (NeedWrapperFunction
)
627 FunctionOptions
FO(&S
, !NeedWrapperFunction
, /*RegisterCastedArgsOnly=*/false,
629 llvm::Function
*F
= emitOutlinedFunctionPrologue(*this, Args
, LocalAddrs
,
630 VLASizes
, CXXThisValue
, FO
);
631 CodeGenFunction::OMPPrivateScope
LocalScope(*this);
632 for (const auto &LocalAddrPair
: LocalAddrs
) {
633 if (LocalAddrPair
.second
.first
) {
634 LocalScope
.addPrivate(LocalAddrPair
.second
.first
,
635 LocalAddrPair
.second
.second
);
638 (void)LocalScope
.Privatize();
639 for (const auto &VLASizePair
: VLASizes
)
640 VLASizeMap
[VLASizePair
.second
.first
] = VLASizePair
.second
.second
;
641 PGO
.assignRegionCounters(GlobalDecl(CD
), F
);
642 CapturedStmtInfo
->EmitBody(*this, CD
->getBody());
643 (void)LocalScope
.ForceCleanup();
644 FinishFunction(CD
->getBodyRBrace());
645 if (!NeedWrapperFunction
)
648 FunctionOptions
WrapperFO(&S
, /*UIntPtrCastRequired=*/true,
649 /*RegisterCastedArgsOnly=*/true,
650 CapturedStmtInfo
->getHelperName(), Loc
);
651 CodeGenFunction
WrapperCGF(CGM
, /*suppressNewContext=*/true);
652 WrapperCGF
.CapturedStmtInfo
= CapturedStmtInfo
;
656 llvm::Function
*WrapperF
=
657 emitOutlinedFunctionPrologue(WrapperCGF
, Args
, LocalAddrs
, VLASizes
,
658 WrapperCGF
.CXXThisValue
, WrapperFO
);
659 llvm::SmallVector
<llvm::Value
*, 4> CallArgs
;
660 auto *PI
= F
->arg_begin();
661 for (const auto *Arg
: Args
) {
662 llvm::Value
*CallArg
;
663 auto I
= LocalAddrs
.find(Arg
);
664 if (I
!= LocalAddrs
.end()) {
665 LValue LV
= WrapperCGF
.MakeAddrLValue(
667 I
->second
.first
? I
->second
.first
->getType() : Arg
->getType(),
668 AlignmentSource::Decl
);
669 if (LV
.getType()->isAnyComplexType())
670 LV
.setAddress(WrapperCGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
671 LV
.getAddress(WrapperCGF
),
672 PI
->getType()->getPointerTo(
673 LV
.getAddress(WrapperCGF
).getAddressSpace()),
675 CallArg
= WrapperCGF
.EmitLoadOfScalar(LV
, S
.getBeginLoc());
677 auto EI
= VLASizes
.find(Arg
);
678 if (EI
!= VLASizes
.end()) {
679 CallArg
= EI
->second
.second
;
682 WrapperCGF
.MakeAddrLValue(WrapperCGF
.GetAddrOfLocalVar(Arg
),
683 Arg
->getType(), AlignmentSource::Decl
);
684 CallArg
= WrapperCGF
.EmitLoadOfScalar(LV
, S
.getBeginLoc());
687 CallArgs
.emplace_back(WrapperCGF
.EmitFromMemory(CallArg
, Arg
->getType()));
690 CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF
, Loc
, F
, CallArgs
);
691 WrapperCGF
.FinishFunction();
695 //===----------------------------------------------------------------------===//
696 // OpenMP Directive Emission
697 //===----------------------------------------------------------------------===//
698 void CodeGenFunction::EmitOMPAggregateAssign(
699 Address DestAddr
, Address SrcAddr
, QualType OriginalType
,
700 const llvm::function_ref
<void(Address
, Address
)> CopyGen
) {
701 // Perform element-by-element initialization.
704 // Drill down to the base element type on both arrays.
705 const ArrayType
*ArrayTy
= OriginalType
->getAsArrayTypeUnsafe();
706 llvm::Value
*NumElements
= emitArrayLength(ArrayTy
, ElementTy
, DestAddr
);
707 SrcAddr
= Builder
.CreateElementBitCast(SrcAddr
, DestAddr
.getElementType());
709 llvm::Value
*SrcBegin
= SrcAddr
.getPointer();
710 llvm::Value
*DestBegin
= DestAddr
.getPointer();
711 // Cast from pointer to array type to pointer to single element.
712 llvm::Value
*DestEnd
= Builder
.CreateInBoundsGEP(DestAddr
.getElementType(),
713 DestBegin
, NumElements
);
715 // The basic structure here is a while-do loop.
716 llvm::BasicBlock
*BodyBB
= createBasicBlock("omp.arraycpy.body");
717 llvm::BasicBlock
*DoneBB
= createBasicBlock("omp.arraycpy.done");
718 llvm::Value
*IsEmpty
=
719 Builder
.CreateICmpEQ(DestBegin
, DestEnd
, "omp.arraycpy.isempty");
720 Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
722 // Enter the loop body, making that address the current address.
723 llvm::BasicBlock
*EntryBB
= Builder
.GetInsertBlock();
726 CharUnits ElementSize
= getContext().getTypeSizeInChars(ElementTy
);
728 llvm::PHINode
*SrcElementPHI
=
729 Builder
.CreatePHI(SrcBegin
->getType(), 2, "omp.arraycpy.srcElementPast");
730 SrcElementPHI
->addIncoming(SrcBegin
, EntryBB
);
731 Address SrcElementCurrent
=
732 Address(SrcElementPHI
, SrcAddr
.getElementType(),
733 SrcAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
735 llvm::PHINode
*DestElementPHI
= Builder
.CreatePHI(
736 DestBegin
->getType(), 2, "omp.arraycpy.destElementPast");
737 DestElementPHI
->addIncoming(DestBegin
, EntryBB
);
738 Address DestElementCurrent
=
739 Address(DestElementPHI
, DestAddr
.getElementType(),
740 DestAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
743 CopyGen(DestElementCurrent
, SrcElementCurrent
);
745 // Shift the address forward by one element.
746 llvm::Value
*DestElementNext
=
747 Builder
.CreateConstGEP1_32(DestAddr
.getElementType(), DestElementPHI
,
748 /*Idx0=*/1, "omp.arraycpy.dest.element");
749 llvm::Value
*SrcElementNext
=
750 Builder
.CreateConstGEP1_32(SrcAddr
.getElementType(), SrcElementPHI
,
751 /*Idx0=*/1, "omp.arraycpy.src.element");
752 // Check whether we've reached the end.
754 Builder
.CreateICmpEQ(DestElementNext
, DestEnd
, "omp.arraycpy.done");
755 Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
756 DestElementPHI
->addIncoming(DestElementNext
, Builder
.GetInsertBlock());
757 SrcElementPHI
->addIncoming(SrcElementNext
, Builder
.GetInsertBlock());
760 EmitBlock(DoneBB
, /*IsFinished=*/true);
763 void CodeGenFunction::EmitOMPCopy(QualType OriginalType
, Address DestAddr
,
764 Address SrcAddr
, const VarDecl
*DestVD
,
765 const VarDecl
*SrcVD
, const Expr
*Copy
) {
766 if (OriginalType
->isArrayType()) {
767 const auto *BO
= dyn_cast
<BinaryOperator
>(Copy
);
768 if (BO
&& BO
->getOpcode() == BO_Assign
) {
769 // Perform simple memcpy for simple copying.
770 LValue Dest
= MakeAddrLValue(DestAddr
, OriginalType
);
771 LValue Src
= MakeAddrLValue(SrcAddr
, OriginalType
);
772 EmitAggregateAssign(Dest
, Src
, OriginalType
);
774 // For arrays with complex element types perform element by element
776 EmitOMPAggregateAssign(
777 DestAddr
, SrcAddr
, OriginalType
,
778 [this, Copy
, SrcVD
, DestVD
](Address DestElement
, Address SrcElement
) {
779 // Working with the single array element, so have to remap
780 // destination and source variables to corresponding array
782 CodeGenFunction::OMPPrivateScope
Remap(*this);
783 Remap
.addPrivate(DestVD
, DestElement
);
784 Remap
.addPrivate(SrcVD
, SrcElement
);
785 (void)Remap
.Privatize();
786 EmitIgnoredExpr(Copy
);
790 // Remap pseudo source variable to private copy.
791 CodeGenFunction::OMPPrivateScope
Remap(*this);
792 Remap
.addPrivate(SrcVD
, SrcAddr
);
793 Remap
.addPrivate(DestVD
, DestAddr
);
794 (void)Remap
.Privatize();
795 // Emit copying of the whole variable.
796 EmitIgnoredExpr(Copy
);
800 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective
&D
,
801 OMPPrivateScope
&PrivateScope
) {
802 if (!HaveInsertPoint())
804 bool DeviceConstTarget
=
805 getLangOpts().OpenMPIsDevice
&&
806 isOpenMPTargetExecutionDirective(D
.getDirectiveKind());
807 bool FirstprivateIsLastprivate
= false;
808 llvm::DenseMap
<const VarDecl
*, OpenMPLastprivateModifier
> Lastprivates
;
809 for (const auto *C
: D
.getClausesOfKind
<OMPLastprivateClause
>()) {
810 for (const auto *D
: C
->varlists())
811 Lastprivates
.try_emplace(
812 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
)->getDecl())->getCanonicalDecl(),
815 llvm::DenseSet
<const VarDecl
*> EmittedAsFirstprivate
;
816 llvm::SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
817 getOpenMPCaptureRegions(CaptureRegions
, D
.getDirectiveKind());
818 // Force emission of the firstprivate copy if the directive does not emit
819 // outlined function, like omp for, omp simd, omp distribute etc.
820 bool MustEmitFirstprivateCopy
=
821 CaptureRegions
.size() == 1 && CaptureRegions
.back() == OMPD_unknown
;
822 for (const auto *C
: D
.getClausesOfKind
<OMPFirstprivateClause
>()) {
823 const auto *IRef
= C
->varlist_begin();
824 const auto *InitsRef
= C
->inits().begin();
825 for (const Expr
*IInit
: C
->private_copies()) {
826 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
827 bool ThisFirstprivateIsLastprivate
=
828 Lastprivates
.count(OrigVD
->getCanonicalDecl()) > 0;
829 const FieldDecl
*FD
= CapturedStmtInfo
->lookup(OrigVD
);
830 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IInit
)->getDecl());
831 if (!MustEmitFirstprivateCopy
&& !ThisFirstprivateIsLastprivate
&& FD
&&
832 !FD
->getType()->isReferenceType() &&
833 (!VD
|| !VD
->hasAttr
<OMPAllocateDeclAttr
>())) {
834 EmittedAsFirstprivate
.insert(OrigVD
->getCanonicalDecl());
839 // Do not emit copy for firstprivate constant variables in target regions,
840 // captured by reference.
841 if (DeviceConstTarget
&& OrigVD
->getType().isConstant(getContext()) &&
842 FD
&& FD
->getType()->isReferenceType() &&
843 (!VD
|| !VD
->hasAttr
<OMPAllocateDeclAttr
>())) {
844 EmittedAsFirstprivate
.insert(OrigVD
->getCanonicalDecl());
849 FirstprivateIsLastprivate
=
850 FirstprivateIsLastprivate
|| ThisFirstprivateIsLastprivate
;
851 if (EmittedAsFirstprivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
853 cast
<VarDecl
>(cast
<DeclRefExpr
>(*InitsRef
)->getDecl());
855 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
856 /*RefersToEnclosingVariableOrCapture=*/FD
!= nullptr,
857 (*IRef
)->getType(), VK_LValue
, (*IRef
)->getExprLoc());
860 // Check if the firstprivate variable is just a constant value.
861 ConstantEmission CE
= tryEmitAsConstant(&DRE
);
862 if (CE
&& !CE
.isReference()) {
863 // Constant value, no need to create a copy.
868 if (CE
&& CE
.isReference()) {
869 OriginalLVal
= CE
.getReferenceLValue(*this, &DRE
);
871 assert(!CE
&& "Expected non-constant firstprivate.");
872 OriginalLVal
= EmitLValue(&DRE
);
875 OriginalLVal
= EmitLValue(&DRE
);
877 QualType Type
= VD
->getType();
878 if (Type
->isArrayType()) {
879 // Emit VarDecl with copy init for arrays.
880 // Get the address of the original variable captured in current
882 AutoVarEmission Emission
= EmitAutoVarAlloca(*VD
);
883 const Expr
*Init
= VD
->getInit();
884 if (!isa
<CXXConstructExpr
>(Init
) || isTrivialInitializer(Init
)) {
885 // Perform simple memcpy.
886 LValue Dest
= MakeAddrLValue(Emission
.getAllocatedAddress(), Type
);
887 EmitAggregateAssign(Dest
, OriginalLVal
, Type
);
889 EmitOMPAggregateAssign(
890 Emission
.getAllocatedAddress(), OriginalLVal
.getAddress(*this),
892 [this, VDInit
, Init
](Address DestElement
, Address SrcElement
) {
893 // Clean up any temporaries needed by the
895 RunCleanupsScope
InitScope(*this);
896 // Emit initialization for single element.
897 setAddrOfLocalVar(VDInit
, SrcElement
);
898 EmitAnyExprToMem(Init
, DestElement
,
899 Init
->getType().getQualifiers(),
900 /*IsInitializer*/ false);
901 LocalDeclMap
.erase(VDInit
);
904 EmitAutoVarCleanups(Emission
);
906 PrivateScope
.addPrivate(OrigVD
, Emission
.getAllocatedAddress());
908 Address OriginalAddr
= OriginalLVal
.getAddress(*this);
909 // Emit private VarDecl with copy init.
910 // Remap temp VDInit variable to the address of the original
911 // variable (for proper handling of captured global variables).
912 setAddrOfLocalVar(VDInit
, OriginalAddr
);
914 LocalDeclMap
.erase(VDInit
);
915 Address VDAddr
= GetAddrOfLocalVar(VD
);
916 if (ThisFirstprivateIsLastprivate
&&
917 Lastprivates
[OrigVD
->getCanonicalDecl()] ==
918 OMPC_LASTPRIVATE_conditional
) {
919 // Create/init special variable for lastprivate conditionals.
921 EmitLoadOfScalar(MakeAddrLValue(VDAddr
, (*IRef
)->getType(),
922 AlignmentSource::Decl
),
923 (*IRef
)->getExprLoc());
924 VDAddr
= CGM
.getOpenMPRuntime().emitLastprivateConditionalInit(
926 EmitStoreOfScalar(V
, MakeAddrLValue(VDAddr
, (*IRef
)->getType(),
927 AlignmentSource::Decl
));
928 LocalDeclMap
.erase(VD
);
929 setAddrOfLocalVar(VD
, VDAddr
);
931 IsRegistered
= PrivateScope
.addPrivate(OrigVD
, VDAddr
);
933 assert(IsRegistered
&&
934 "firstprivate var already registered as private");
935 // Silence the warning about unused variable.
942 return FirstprivateIsLastprivate
&& !EmittedAsFirstprivate
.empty();
945 void CodeGenFunction::EmitOMPPrivateClause(
946 const OMPExecutableDirective
&D
,
947 CodeGenFunction::OMPPrivateScope
&PrivateScope
) {
948 if (!HaveInsertPoint())
950 llvm::DenseSet
<const VarDecl
*> EmittedAsPrivate
;
951 for (const auto *C
: D
.getClausesOfKind
<OMPPrivateClause
>()) {
952 auto IRef
= C
->varlist_begin();
953 for (const Expr
*IInit
: C
->private_copies()) {
954 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
955 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
956 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IInit
)->getDecl());
958 // Emit private VarDecl with copy init.
960 PrivateScope
.addPrivate(OrigVD
, GetAddrOfLocalVar(VD
));
961 assert(IsRegistered
&& "private var already registered as private");
962 // Silence the warning about unused variable.
970 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective
&D
) {
971 if (!HaveInsertPoint())
973 // threadprivate_var1 = master_threadprivate_var1;
974 // operator=(threadprivate_var2, master_threadprivate_var2);
976 // __kmpc_barrier(&loc, global_tid);
977 llvm::DenseSet
<const VarDecl
*> CopiedVars
;
978 llvm::BasicBlock
*CopyBegin
= nullptr, *CopyEnd
= nullptr;
979 for (const auto *C
: D
.getClausesOfKind
<OMPCopyinClause
>()) {
980 auto IRef
= C
->varlist_begin();
981 auto ISrcRef
= C
->source_exprs().begin();
982 auto IDestRef
= C
->destination_exprs().begin();
983 for (const Expr
*AssignOp
: C
->assignment_ops()) {
984 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
985 QualType Type
= VD
->getType();
986 if (CopiedVars
.insert(VD
->getCanonicalDecl()).second
) {
987 // Get the address of the master variable. If we are emitting code with
988 // TLS support, the address is passed from the master as field in the
989 // captured declaration.
990 Address MasterAddr
= Address::invalid();
991 if (getLangOpts().OpenMPUseTLS
&&
992 getContext().getTargetInfo().isTLSSupported()) {
993 assert(CapturedStmtInfo
->lookup(VD
) &&
994 "Copyin threadprivates should have been captured!");
995 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(VD
), true,
996 (*IRef
)->getType(), VK_LValue
, (*IRef
)->getExprLoc());
997 MasterAddr
= EmitLValue(&DRE
).getAddress(*this);
998 LocalDeclMap
.erase(VD
);
1001 Address(VD
->isStaticLocal() ? CGM
.getStaticLocalDeclAddress(VD
)
1002 : CGM
.GetAddrOfGlobal(VD
),
1003 CGM
.getTypes().ConvertTypeForMem(VD
->getType()),
1004 getContext().getDeclAlign(VD
));
1006 // Get the address of the threadprivate variable.
1007 Address PrivateAddr
= EmitLValue(*IRef
).getAddress(*this);
1008 if (CopiedVars
.size() == 1) {
1009 // At first check if current thread is a master thread. If it is, no
1010 // need to copy data.
1011 CopyBegin
= createBasicBlock("copyin.not.master");
1012 CopyEnd
= createBasicBlock("copyin.not.master.end");
1013 // TODO: Avoid ptrtoint conversion.
1014 auto *MasterAddrInt
=
1015 Builder
.CreatePtrToInt(MasterAddr
.getPointer(), CGM
.IntPtrTy
);
1016 auto *PrivateAddrInt
=
1017 Builder
.CreatePtrToInt(PrivateAddr
.getPointer(), CGM
.IntPtrTy
);
1018 Builder
.CreateCondBr(
1019 Builder
.CreateICmpNE(MasterAddrInt
, PrivateAddrInt
), CopyBegin
,
1021 EmitBlock(CopyBegin
);
1024 cast
<VarDecl
>(cast
<DeclRefExpr
>(*ISrcRef
)->getDecl());
1025 const auto *DestVD
=
1026 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IDestRef
)->getDecl());
1027 EmitOMPCopy(Type
, PrivateAddr
, MasterAddr
, DestVD
, SrcVD
, AssignOp
);
1035 // Exit out of copying procedure for non-master thread.
1036 EmitBlock(CopyEnd
, /*IsFinished=*/true);
1042 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1043 const OMPExecutableDirective
&D
, OMPPrivateScope
&PrivateScope
) {
1044 if (!HaveInsertPoint())
1046 bool HasAtLeastOneLastprivate
= false;
1047 llvm::DenseSet
<const VarDecl
*> SIMDLCVs
;
1048 if (isOpenMPSimdDirective(D
.getDirectiveKind())) {
1049 const auto *LoopDirective
= cast
<OMPLoopDirective
>(&D
);
1050 for (const Expr
*C
: LoopDirective
->counters()) {
1052 cast
<VarDecl
>(cast
<DeclRefExpr
>(C
)->getDecl())->getCanonicalDecl());
1055 llvm::DenseSet
<const VarDecl
*> AlreadyEmittedVars
;
1056 for (const auto *C
: D
.getClausesOfKind
<OMPLastprivateClause
>()) {
1057 HasAtLeastOneLastprivate
= true;
1058 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind()) &&
1059 !getLangOpts().OpenMPSimd
)
1061 const auto *IRef
= C
->varlist_begin();
1062 const auto *IDestRef
= C
->destination_exprs().begin();
1063 for (const Expr
*IInit
: C
->private_copies()) {
1064 // Keep the address of the original variable for future update at the end
1066 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
1067 // Taskloops do not require additional initialization, it is done in
1068 // runtime support library.
1069 if (AlreadyEmittedVars
.insert(OrigVD
->getCanonicalDecl()).second
) {
1070 const auto *DestVD
=
1071 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IDestRef
)->getDecl());
1072 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
1073 /*RefersToEnclosingVariableOrCapture=*/
1074 CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
1075 (*IRef
)->getType(), VK_LValue
, (*IRef
)->getExprLoc());
1076 PrivateScope
.addPrivate(DestVD
, EmitLValue(&DRE
).getAddress(*this));
1077 // Check if the variable is also a firstprivate: in this case IInit is
1078 // not generated. Initialization of this variable will happen in codegen
1079 // for 'firstprivate' clause.
1080 if (IInit
&& !SIMDLCVs
.count(OrigVD
->getCanonicalDecl())) {
1081 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IInit
)->getDecl());
1082 Address VDAddr
= Address::invalid();
1083 if (C
->getKind() == OMPC_LASTPRIVATE_conditional
) {
1084 VDAddr
= CGM
.getOpenMPRuntime().emitLastprivateConditionalInit(
1086 setAddrOfLocalVar(VD
, VDAddr
);
1088 // Emit private VarDecl with copy init.
1090 VDAddr
= GetAddrOfLocalVar(VD
);
1092 bool IsRegistered
= PrivateScope
.addPrivate(OrigVD
, VDAddr
);
1093 assert(IsRegistered
&&
1094 "lastprivate var already registered as private");
1102 return HasAtLeastOneLastprivate
;
1105 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1106 const OMPExecutableDirective
&D
, bool NoFinals
,
1107 llvm::Value
*IsLastIterCond
) {
1108 if (!HaveInsertPoint())
1110 // Emit following code:
1111 // if (<IsLastIterCond>) {
1112 // orig_var1 = private_orig_var1;
1114 // orig_varn = private_orig_varn;
1116 llvm::BasicBlock
*ThenBB
= nullptr;
1117 llvm::BasicBlock
*DoneBB
= nullptr;
1118 if (IsLastIterCond
) {
1119 // Emit implicit barrier if at least one lastprivate conditional is found
1120 // and this is not a simd mode.
1121 if (!getLangOpts().OpenMPSimd
&&
1122 llvm::any_of(D
.getClausesOfKind
<OMPLastprivateClause
>(),
1123 [](const OMPLastprivateClause
*C
) {
1124 return C
->getKind() == OMPC_LASTPRIVATE_conditional
;
1126 CGM
.getOpenMPRuntime().emitBarrierCall(*this, D
.getBeginLoc(),
1128 /*EmitChecks=*/false,
1129 /*ForceSimpleCall=*/true);
1131 ThenBB
= createBasicBlock(".omp.lastprivate.then");
1132 DoneBB
= createBasicBlock(".omp.lastprivate.done");
1133 Builder
.CreateCondBr(IsLastIterCond
, ThenBB
, DoneBB
);
1136 llvm::DenseSet
<const VarDecl
*> AlreadyEmittedVars
;
1137 llvm::DenseMap
<const VarDecl
*, const Expr
*> LoopCountersAndUpdates
;
1138 if (const auto *LoopDirective
= dyn_cast
<OMPLoopDirective
>(&D
)) {
1139 auto IC
= LoopDirective
->counters().begin();
1140 for (const Expr
*F
: LoopDirective
->finals()) {
1142 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IC
)->getDecl())->getCanonicalDecl();
1144 AlreadyEmittedVars
.insert(D
);
1146 LoopCountersAndUpdates
[D
] = F
;
1150 for (const auto *C
: D
.getClausesOfKind
<OMPLastprivateClause
>()) {
1151 auto IRef
= C
->varlist_begin();
1152 auto ISrcRef
= C
->source_exprs().begin();
1153 auto IDestRef
= C
->destination_exprs().begin();
1154 for (const Expr
*AssignOp
: C
->assignment_ops()) {
1155 const auto *PrivateVD
=
1156 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
1157 QualType Type
= PrivateVD
->getType();
1158 const auto *CanonicalVD
= PrivateVD
->getCanonicalDecl();
1159 if (AlreadyEmittedVars
.insert(CanonicalVD
).second
) {
1160 // If lastprivate variable is a loop control variable for loop-based
1161 // directive, update its value before copyin back to original
1163 if (const Expr
*FinalExpr
= LoopCountersAndUpdates
.lookup(CanonicalVD
))
1164 EmitIgnoredExpr(FinalExpr
);
1166 cast
<VarDecl
>(cast
<DeclRefExpr
>(*ISrcRef
)->getDecl());
1167 const auto *DestVD
=
1168 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IDestRef
)->getDecl());
1169 // Get the address of the private variable.
1170 Address PrivateAddr
= GetAddrOfLocalVar(PrivateVD
);
1171 if (const auto *RefTy
= PrivateVD
->getType()->getAs
<ReferenceType
>())
1172 PrivateAddr
= Address(
1173 Builder
.CreateLoad(PrivateAddr
),
1174 CGM
.getTypes().ConvertTypeForMem(RefTy
->getPointeeType()),
1175 CGM
.getNaturalTypeAlignment(RefTy
->getPointeeType()));
1176 // Store the last value to the private copy in the last iteration.
1177 if (C
->getKind() == OMPC_LASTPRIVATE_conditional
)
1178 CGM
.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1179 *this, MakeAddrLValue(PrivateAddr
, (*IRef
)->getType()), PrivateVD
,
1180 (*IRef
)->getExprLoc());
1181 // Get the address of the original variable.
1182 Address OriginalAddr
= GetAddrOfLocalVar(DestVD
);
1183 EmitOMPCopy(Type
, OriginalAddr
, PrivateAddr
, DestVD
, SrcVD
, AssignOp
);
1189 if (const Expr
*PostUpdate
= C
->getPostUpdateExpr())
1190 EmitIgnoredExpr(PostUpdate
);
1193 EmitBlock(DoneBB
, /*IsFinished=*/true);
1196 void CodeGenFunction::EmitOMPReductionClauseInit(
1197 const OMPExecutableDirective
&D
,
1198 CodeGenFunction::OMPPrivateScope
&PrivateScope
, bool ForInscan
) {
1199 if (!HaveInsertPoint())
1201 SmallVector
<const Expr
*, 4> Shareds
;
1202 SmallVector
<const Expr
*, 4> Privates
;
1203 SmallVector
<const Expr
*, 4> ReductionOps
;
1204 SmallVector
<const Expr
*, 4> LHSs
;
1205 SmallVector
<const Expr
*, 4> RHSs
;
1207 SmallVector
<const Expr
*, 4> TaskLHSs
;
1208 SmallVector
<const Expr
*, 4> TaskRHSs
;
1209 for (const auto *C
: D
.getClausesOfKind
<OMPReductionClause
>()) {
1210 if (ForInscan
!= (C
->getModifier() == OMPC_REDUCTION_inscan
))
1212 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
1213 Privates
.append(C
->privates().begin(), C
->privates().end());
1214 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
1215 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
1216 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
1217 if (C
->getModifier() == OMPC_REDUCTION_task
) {
1218 Data
.ReductionVars
.append(C
->privates().begin(), C
->privates().end());
1219 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
1220 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
1221 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
1222 C
->reduction_ops().end());
1223 TaskLHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
1224 TaskRHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
1227 ReductionCodeGen
RedCG(Shareds
, Shareds
, Privates
, ReductionOps
);
1229 auto *ILHS
= LHSs
.begin();
1230 auto *IRHS
= RHSs
.begin();
1231 auto *IPriv
= Privates
.begin();
1232 for (const Expr
*IRef
: Shareds
) {
1233 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IPriv
)->getDecl());
1234 // Emit private VarDecl with reduction init.
1235 RedCG
.emitSharedOrigLValue(*this, Count
);
1236 RedCG
.emitAggregateType(*this, Count
);
1237 AutoVarEmission Emission
= EmitAutoVarAlloca(*PrivateVD
);
1238 RedCG
.emitInitialization(*this, Count
, Emission
.getAllocatedAddress(),
1239 RedCG
.getSharedLValue(Count
).getAddress(*this),
1240 [&Emission
](CodeGenFunction
&CGF
) {
1241 CGF
.EmitAutoVarInit(Emission
);
1244 EmitAutoVarCleanups(Emission
);
1245 Address BaseAddr
= RedCG
.adjustPrivateAddress(
1246 *this, Count
, Emission
.getAllocatedAddress());
1248 PrivateScope
.addPrivate(RedCG
.getBaseDecl(Count
), BaseAddr
);
1249 assert(IsRegistered
&& "private var already registered as private");
1250 // Silence the warning about unused variable.
1253 const auto *LHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
1254 const auto *RHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
1255 QualType Type
= PrivateVD
->getType();
1256 bool isaOMPArraySectionExpr
= isa
<OMPArraySectionExpr
>(IRef
);
1257 if (isaOMPArraySectionExpr
&& Type
->isVariablyModifiedType()) {
1258 // Store the address of the original variable associated with the LHS
1259 // implicit variable.
1260 PrivateScope
.addPrivate(LHSVD
,
1261 RedCG
.getSharedLValue(Count
).getAddress(*this));
1262 PrivateScope
.addPrivate(RHSVD
, GetAddrOfLocalVar(PrivateVD
));
1263 } else if ((isaOMPArraySectionExpr
&& Type
->isScalarType()) ||
1264 isa
<ArraySubscriptExpr
>(IRef
)) {
1265 // Store the address of the original variable associated with the LHS
1266 // implicit variable.
1267 PrivateScope
.addPrivate(LHSVD
,
1268 RedCG
.getSharedLValue(Count
).getAddress(*this));
1269 PrivateScope
.addPrivate(RHSVD
, Builder
.CreateElementBitCast(
1270 GetAddrOfLocalVar(PrivateVD
),
1271 ConvertTypeForMem(RHSVD
->getType()),
1274 QualType Type
= PrivateVD
->getType();
1275 bool IsArray
= getContext().getAsArrayType(Type
) != nullptr;
1276 Address OriginalAddr
= RedCG
.getSharedLValue(Count
).getAddress(*this);
1277 // Store the address of the original variable associated with the LHS
1278 // implicit variable.
1280 OriginalAddr
= Builder
.CreateElementBitCast(
1281 OriginalAddr
, ConvertTypeForMem(LHSVD
->getType()), "lhs.begin");
1283 PrivateScope
.addPrivate(LHSVD
, OriginalAddr
);
1284 PrivateScope
.addPrivate(
1285 RHSVD
, IsArray
? Builder
.CreateElementBitCast(
1286 GetAddrOfLocalVar(PrivateVD
),
1287 ConvertTypeForMem(RHSVD
->getType()), "rhs.begin")
1288 : GetAddrOfLocalVar(PrivateVD
));
1295 if (!Data
.ReductionVars
.empty()) {
1296 Data
.IsReductionWithTaskMod
= true;
1297 Data
.IsWorksharingReduction
=
1298 isOpenMPWorksharingDirective(D
.getDirectiveKind());
1299 llvm::Value
*ReductionDesc
= CGM
.getOpenMPRuntime().emitTaskReductionInit(
1300 *this, D
.getBeginLoc(), TaskLHSs
, TaskRHSs
, Data
);
1301 const Expr
*TaskRedRef
= nullptr;
1302 switch (D
.getDirectiveKind()) {
1304 TaskRedRef
= cast
<OMPParallelDirective
>(D
).getTaskReductionRefExpr();
1307 TaskRedRef
= cast
<OMPForDirective
>(D
).getTaskReductionRefExpr();
1310 TaskRedRef
= cast
<OMPSectionsDirective
>(D
).getTaskReductionRefExpr();
1312 case OMPD_parallel_for
:
1313 TaskRedRef
= cast
<OMPParallelForDirective
>(D
).getTaskReductionRefExpr();
1315 case OMPD_parallel_master
:
1317 cast
<OMPParallelMasterDirective
>(D
).getTaskReductionRefExpr();
1319 case OMPD_parallel_sections
:
1321 cast
<OMPParallelSectionsDirective
>(D
).getTaskReductionRefExpr();
1323 case OMPD_target_parallel
:
1325 cast
<OMPTargetParallelDirective
>(D
).getTaskReductionRefExpr();
1327 case OMPD_target_parallel_for
:
1329 cast
<OMPTargetParallelForDirective
>(D
).getTaskReductionRefExpr();
1331 case OMPD_distribute_parallel_for
:
1333 cast
<OMPDistributeParallelForDirective
>(D
).getTaskReductionRefExpr();
1335 case OMPD_teams_distribute_parallel_for
:
1336 TaskRedRef
= cast
<OMPTeamsDistributeParallelForDirective
>(D
)
1337 .getTaskReductionRefExpr();
1339 case OMPD_target_teams_distribute_parallel_for
:
1340 TaskRedRef
= cast
<OMPTargetTeamsDistributeParallelForDirective
>(D
)
1341 .getTaskReductionRefExpr();
1349 case OMPD_parallel_for_simd
:
1351 case OMPD_taskyield
:
1355 case OMPD_taskgroup
:
1363 case OMPD_cancellation_point
:
1365 case OMPD_target_data
:
1366 case OMPD_target_enter_data
:
1367 case OMPD_target_exit_data
:
1369 case OMPD_taskloop_simd
:
1370 case OMPD_master_taskloop
:
1371 case OMPD_master_taskloop_simd
:
1372 case OMPD_parallel_master_taskloop
:
1373 case OMPD_parallel_master_taskloop_simd
:
1374 case OMPD_distribute
:
1375 case OMPD_target_update
:
1376 case OMPD_distribute_parallel_for_simd
:
1377 case OMPD_distribute_simd
:
1378 case OMPD_target_parallel_for_simd
:
1379 case OMPD_target_simd
:
1380 case OMPD_teams_distribute
:
1381 case OMPD_teams_distribute_simd
:
1382 case OMPD_teams_distribute_parallel_for_simd
:
1383 case OMPD_target_teams
:
1384 case OMPD_target_teams_distribute
:
1385 case OMPD_target_teams_distribute_parallel_for_simd
:
1386 case OMPD_target_teams_distribute_simd
:
1387 case OMPD_declare_target
:
1388 case OMPD_end_declare_target
:
1389 case OMPD_threadprivate
:
1391 case OMPD_declare_reduction
:
1392 case OMPD_declare_mapper
:
1393 case OMPD_declare_simd
:
1395 case OMPD_declare_variant
:
1396 case OMPD_begin_declare_variant
:
1397 case OMPD_end_declare_variant
:
1400 llvm_unreachable("Enexpected directive with task reductions.");
1403 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(TaskRedRef
)->getDecl());
1405 EmitStoreOfScalar(ReductionDesc
, GetAddrOfLocalVar(VD
),
1406 /*Volatile=*/false, TaskRedRef
->getType());
1410 void CodeGenFunction::EmitOMPReductionClauseFinal(
1411 const OMPExecutableDirective
&D
, const OpenMPDirectiveKind ReductionKind
) {
1412 if (!HaveInsertPoint())
1414 llvm::SmallVector
<const Expr
*, 8> Privates
;
1415 llvm::SmallVector
<const Expr
*, 8> LHSExprs
;
1416 llvm::SmallVector
<const Expr
*, 8> RHSExprs
;
1417 llvm::SmallVector
<const Expr
*, 8> ReductionOps
;
1418 bool HasAtLeastOneReduction
= false;
1419 bool IsReductionWithTaskMod
= false;
1420 for (const auto *C
: D
.getClausesOfKind
<OMPReductionClause
>()) {
1421 // Do not emit for inscan reductions.
1422 if (C
->getModifier() == OMPC_REDUCTION_inscan
)
1424 HasAtLeastOneReduction
= true;
1425 Privates
.append(C
->privates().begin(), C
->privates().end());
1426 LHSExprs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
1427 RHSExprs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
1428 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
1429 IsReductionWithTaskMod
=
1430 IsReductionWithTaskMod
|| C
->getModifier() == OMPC_REDUCTION_task
;
1432 if (HasAtLeastOneReduction
) {
1433 if (IsReductionWithTaskMod
) {
1434 CGM
.getOpenMPRuntime().emitTaskReductionFini(
1435 *this, D
.getBeginLoc(),
1436 isOpenMPWorksharingDirective(D
.getDirectiveKind()));
1438 bool WithNowait
= D
.getSingleClause
<OMPNowaitClause
>() ||
1439 isOpenMPParallelDirective(D
.getDirectiveKind()) ||
1440 ReductionKind
== OMPD_simd
;
1441 bool SimpleReduction
= ReductionKind
== OMPD_simd
;
1442 // Emit nowait reduction if nowait clause is present or directive is a
1443 // parallel directive (it always has implicit barrier).
1444 CGM
.getOpenMPRuntime().emitReduction(
1445 *this, D
.getEndLoc(), Privates
, LHSExprs
, RHSExprs
, ReductionOps
,
1446 {WithNowait
, SimpleReduction
, ReductionKind
});
1450 static void emitPostUpdateForReductionClause(
1451 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
1452 const llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> CondGen
) {
1453 if (!CGF
.HaveInsertPoint())
1455 llvm::BasicBlock
*DoneBB
= nullptr;
1456 for (const auto *C
: D
.getClausesOfKind
<OMPReductionClause
>()) {
1457 if (const Expr
*PostUpdate
= C
->getPostUpdateExpr()) {
1459 if (llvm::Value
*Cond
= CondGen(CGF
)) {
1460 // If the first post-update expression is found, emit conditional
1461 // block if it was requested.
1462 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock(".omp.reduction.pu");
1463 DoneBB
= CGF
.createBasicBlock(".omp.reduction.pu.done");
1464 CGF
.Builder
.CreateCondBr(Cond
, ThenBB
, DoneBB
);
1465 CGF
.EmitBlock(ThenBB
);
1468 CGF
.EmitIgnoredExpr(PostUpdate
);
1472 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
1476 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1477 /// parallel function. This is necessary for combined constructs such as
1478 /// 'distribute parallel for'
1479 typedef llvm::function_ref
<void(CodeGenFunction
&,
1480 const OMPExecutableDirective
&,
1481 llvm::SmallVectorImpl
<llvm::Value
*> &)>
1482 CodeGenBoundParametersTy
;
1483 } // anonymous namespace
1486 checkForLastprivateConditionalUpdate(CodeGenFunction
&CGF
,
1487 const OMPExecutableDirective
&S
) {
1488 if (CGF
.getLangOpts().OpenMP
< 50)
1490 llvm::DenseSet
<CanonicalDeclPtr
<const VarDecl
>> PrivateDecls
;
1491 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
1492 for (const Expr
*Ref
: C
->varlists()) {
1493 if (!Ref
->getType()->isScalarType())
1495 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1498 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1499 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, Ref
);
1502 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
1503 for (const Expr
*Ref
: C
->varlists()) {
1504 if (!Ref
->getType()->isScalarType())
1506 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1509 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1510 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, Ref
);
1513 for (const auto *C
: S
.getClausesOfKind
<OMPLinearClause
>()) {
1514 for (const Expr
*Ref
: C
->varlists()) {
1515 if (!Ref
->getType()->isScalarType())
1517 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1520 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1521 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, Ref
);
1524 // Privates should ne analyzed since they are not captured at all.
1525 // Task reductions may be skipped - tasks are ignored.
1526 // Firstprivates do not return value but may be passed by reference - no need
1527 // to check for updated lastprivate conditional.
1528 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
1529 for (const Expr
*Ref
: C
->varlists()) {
1530 if (!Ref
->getType()->isScalarType())
1532 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1535 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1538 CGF
.CGM
.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1539 CGF
, S
, PrivateDecls
);
1542 static void emitCommonOMPParallelDirective(
1543 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
1544 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
1545 const CodeGenBoundParametersTy
&CodeGenBoundParameters
) {
1546 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_parallel
);
1547 llvm::Value
*NumThreads
= nullptr;
1548 llvm::Function
*OutlinedFn
=
1549 CGF
.CGM
.getOpenMPRuntime().emitParallelOutlinedFunction(
1550 S
, *CS
->getCapturedDecl()->param_begin(), InnermostKind
, CodeGen
);
1551 if (const auto *NumThreadsClause
= S
.getSingleClause
<OMPNumThreadsClause
>()) {
1552 CodeGenFunction::RunCleanupsScope
NumThreadsScope(CGF
);
1553 NumThreads
= CGF
.EmitScalarExpr(NumThreadsClause
->getNumThreads(),
1554 /*IgnoreResultAssign=*/true);
1555 CGF
.CGM
.getOpenMPRuntime().emitNumThreadsClause(
1556 CGF
, NumThreads
, NumThreadsClause
->getBeginLoc());
1558 if (const auto *ProcBindClause
= S
.getSingleClause
<OMPProcBindClause
>()) {
1559 CodeGenFunction::RunCleanupsScope
ProcBindScope(CGF
);
1560 CGF
.CGM
.getOpenMPRuntime().emitProcBindClause(
1561 CGF
, ProcBindClause
->getProcBindKind(), ProcBindClause
->getBeginLoc());
1563 const Expr
*IfCond
= nullptr;
1564 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
1565 if (C
->getNameModifier() == OMPD_unknown
||
1566 C
->getNameModifier() == OMPD_parallel
) {
1567 IfCond
= C
->getCondition();
1572 OMPParallelScope
Scope(CGF
, S
);
1573 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
1574 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1575 // lower and upper bounds with the pragma 'for' chunking mechanism.
1576 // The following lambda takes care of appending the lower and upper bound
1577 // parameters when necessary
1578 CodeGenBoundParameters(CGF
, S
, CapturedVars
);
1579 CGF
.GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
1580 CGF
.CGM
.getOpenMPRuntime().emitParallelCall(CGF
, S
.getBeginLoc(), OutlinedFn
,
1581 CapturedVars
, IfCond
, NumThreads
);
1584 static bool isAllocatableDecl(const VarDecl
*VD
) {
1585 const VarDecl
*CVD
= VD
->getCanonicalDecl();
1586 if (!CVD
->hasAttr
<OMPAllocateDeclAttr
>())
1588 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
1589 // Use the default allocation.
1590 return !((AA
->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc
||
1591 AA
->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc
) &&
1592 !AA
->getAllocator());
1595 static void emitEmptyBoundParameters(CodeGenFunction
&,
1596 const OMPExecutableDirective
&,
1597 llvm::SmallVectorImpl
<llvm::Value
*> &) {}
1599 static void emitOMPCopyinClause(CodeGenFunction
&CGF
,
1600 const OMPExecutableDirective
&S
) {
1601 bool Copyins
= CGF
.EmitOMPCopyinClause(S
);
1603 // Emit implicit barrier to synchronize threads and avoid data races on
1604 // propagation master's thread values of threadprivate variables to local
1605 // instances of that variables of all other implicit threads.
1606 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(
1607 CGF
, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
1608 /*ForceSimpleCall=*/true);
1612 Address
CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1613 CodeGenFunction
&CGF
, const VarDecl
*VD
) {
1614 CodeGenModule
&CGM
= CGF
.CGM
;
1615 auto &OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1618 return Address::invalid();
1619 const VarDecl
*CVD
= VD
->getCanonicalDecl();
1620 if (!isAllocatableDecl(CVD
))
1621 return Address::invalid();
1623 CharUnits Align
= CGM
.getContext().getDeclAlign(CVD
);
1624 if (CVD
->getType()->isVariablyModifiedType()) {
1625 Size
= CGF
.getTypeSize(CVD
->getType());
1626 // Align the size: ((size + align - 1) / align) * align
1627 Size
= CGF
.Builder
.CreateNUWAdd(
1628 Size
, CGM
.getSize(Align
- CharUnits::fromQuantity(1)));
1629 Size
= CGF
.Builder
.CreateUDiv(Size
, CGM
.getSize(Align
));
1630 Size
= CGF
.Builder
.CreateNUWMul(Size
, CGM
.getSize(Align
));
1632 CharUnits Sz
= CGM
.getContext().getTypeSizeInChars(CVD
->getType());
1633 Size
= CGM
.getSize(Sz
.alignTo(Align
));
1636 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
1637 assert(AA
->getAllocator() &&
1638 "Expected allocator expression for non-default allocator.");
1639 llvm::Value
*Allocator
= CGF
.EmitScalarExpr(AA
->getAllocator());
1640 // According to the standard, the original allocator type is a enum (integer).
1641 // Convert to pointer type, if required.
1642 if (Allocator
->getType()->isIntegerTy())
1643 Allocator
= CGF
.Builder
.CreateIntToPtr(Allocator
, CGM
.VoidPtrTy
);
1644 else if (Allocator
->getType()->isPointerTy())
1645 Allocator
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(Allocator
,
1648 llvm::Value
*Addr
= OMPBuilder
.createOMPAlloc(
1649 CGF
.Builder
, Size
, Allocator
,
1650 getNameWithSeparators({CVD
->getName(), ".void.addr"}, ".", "."));
1651 llvm::CallInst
*FreeCI
=
1652 OMPBuilder
.createOMPFree(CGF
.Builder
, Addr
, Allocator
);
1654 CGF
.EHStack
.pushCleanup
<OMPAllocateCleanupTy
>(NormalAndEHCleanup
, FreeCI
);
1655 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
1657 CGF
.ConvertTypeForMem(CGM
.getContext().getPointerType(CVD
->getType())),
1658 getNameWithSeparators({CVD
->getName(), ".addr"}, ".", "."));
1659 return Address(Addr
, CGF
.ConvertTypeForMem(CVD
->getType()), Align
);
1662 Address
CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1663 CodeGenFunction
&CGF
, const VarDecl
*VD
, Address VDAddr
,
1664 SourceLocation Loc
) {
1665 CodeGenModule
&CGM
= CGF
.CGM
;
1666 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1667 CGM
.getContext().getTargetInfo().isTLSSupported())
1670 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1672 llvm::Type
*VarTy
= VDAddr
.getElementType();
1674 CGF
.Builder
.CreatePointerCast(VDAddr
.getPointer(), CGM
.Int8PtrTy
);
1675 llvm::ConstantInt
*Size
= CGM
.getSize(CGM
.GetTargetTypeStoreSize(VarTy
));
1676 std::string Suffix
= getNameWithSeparators({"cache", ""});
1677 llvm::Twine CacheName
= Twine(CGM
.getMangledName(VD
)).concat(Suffix
);
1679 llvm::CallInst
*ThreadPrivateCacheCall
=
1680 OMPBuilder
.createCachedThreadPrivate(CGF
.Builder
, Data
, Size
, CacheName
);
1682 return Address(ThreadPrivateCacheCall
, CGM
.Int8Ty
, VDAddr
.getAlignment());
1685 std::string
CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1686 ArrayRef
<StringRef
> Parts
, StringRef FirstSeparator
, StringRef Separator
) {
1687 SmallString
<128> Buffer
;
1688 llvm::raw_svector_ostream
OS(Buffer
);
1689 StringRef Sep
= FirstSeparator
;
1690 for (StringRef Part
: Parts
) {
1694 return OS
.str().str();
1697 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1698 CodeGenFunction
&CGF
, const Stmt
*RegionBodyStmt
, InsertPointTy AllocaIP
,
1699 InsertPointTy CodeGenIP
, Twine RegionName
) {
1700 CGBuilderTy
&Builder
= CGF
.Builder
;
1701 Builder
.restoreIP(CodeGenIP
);
1702 llvm::BasicBlock
*FiniBB
= splitBBWithSuffix(Builder
, /*CreateBranch=*/false,
1703 "." + RegionName
+ ".after");
1706 OMPBuilderCBHelpers::InlinedRegionBodyRAII
IRB(CGF
, AllocaIP
, *FiniBB
);
1707 CGF
.EmitStmt(RegionBodyStmt
);
1710 if (Builder
.saveIP().isSet())
1711 Builder
.CreateBr(FiniBB
);
1714 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1715 CodeGenFunction
&CGF
, const Stmt
*RegionBodyStmt
, InsertPointTy AllocaIP
,
1716 InsertPointTy CodeGenIP
, Twine RegionName
) {
1717 CGBuilderTy
&Builder
= CGF
.Builder
;
1718 Builder
.restoreIP(CodeGenIP
);
1719 llvm::BasicBlock
*FiniBB
= splitBBWithSuffix(Builder
, /*CreateBranch=*/false,
1720 "." + RegionName
+ ".after");
1723 OMPBuilderCBHelpers::OutlinedRegionBodyRAII
IRB(CGF
, AllocaIP
, *FiniBB
);
1724 CGF
.EmitStmt(RegionBodyStmt
);
1727 if (Builder
.saveIP().isSet())
1728 Builder
.CreateBr(FiniBB
);
1731 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective
&S
) {
1732 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
1733 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1734 // Check if we have any if clause associated with the directive.
1735 llvm::Value
*IfCond
= nullptr;
1736 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
1737 IfCond
= EmitScalarExpr(C
->getCondition(),
1738 /*IgnoreResultAssign=*/true);
1740 llvm::Value
*NumThreads
= nullptr;
1741 if (const auto *NumThreadsClause
= S
.getSingleClause
<OMPNumThreadsClause
>())
1742 NumThreads
= EmitScalarExpr(NumThreadsClause
->getNumThreads(),
1743 /*IgnoreResultAssign=*/true);
1745 ProcBindKind ProcBind
= OMP_PROC_BIND_default
;
1746 if (const auto *ProcBindClause
= S
.getSingleClause
<OMPProcBindClause
>())
1747 ProcBind
= ProcBindClause
->getProcBindKind();
1749 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
1751 // The cleanup callback that finalizes all variabels at the given location,
1752 // thus calls destructors etc.
1753 auto FiniCB
= [this](InsertPointTy IP
) {
1754 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
1757 // Privatization callback that performs appropriate action for
1758 // shared/private/firstprivate/lastprivate/copyin/... variables.
1760 // TODO: This defaults to shared right now.
1761 auto PrivCB
= [](InsertPointTy AllocaIP
, InsertPointTy CodeGenIP
,
1762 llvm::Value
&, llvm::Value
&Val
, llvm::Value
*&ReplVal
) {
1763 // The next line is appropriate only for variables (Val) with the
1764 // data-sharing attribute "shared".
1770 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_parallel
);
1771 const Stmt
*ParallelRegionBodyStmt
= CS
->getCapturedStmt();
1773 auto BodyGenCB
= [&, this](InsertPointTy AllocaIP
,
1774 InsertPointTy CodeGenIP
) {
1775 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1776 *this, ParallelRegionBodyStmt
, AllocaIP
, CodeGenIP
, "parallel");
1779 CGCapturedStmtInfo
CGSI(*CS
, CR_OpenMP
);
1780 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(*this, &CGSI
);
1781 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
1782 AllocaInsertPt
->getParent(), AllocaInsertPt
->getIterator());
1784 OMPBuilder
.createParallel(Builder
, AllocaIP
, BodyGenCB
, PrivCB
, FiniCB
,
1785 IfCond
, NumThreads
, ProcBind
, S
.hasCancel()));
1789 // Emit parallel region as a standalone region.
1790 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
1792 OMPPrivateScope
PrivateScope(CGF
);
1793 emitOMPCopyinClause(CGF
, S
);
1794 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
1795 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
1796 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
1797 (void)PrivateScope
.Privatize();
1798 CGF
.EmitStmt(S
.getCapturedStmt(OMPD_parallel
)->getCapturedStmt());
1799 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
1803 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
1804 emitCommonOMPParallelDirective(*this, S
, OMPD_parallel
, CodeGen
,
1805 emitEmptyBoundParameters
);
1806 emitPostUpdateForReductionClause(*this, S
,
1807 [](CodeGenFunction
&) { return nullptr; });
1809 // Check for outer lastprivate conditional update.
1810 checkForLastprivateConditionalUpdate(*this, S
);
1813 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective
&S
) {
1814 EmitStmt(S
.getIfStmt());
1818 /// RAII to handle scopes for loop transformation directives.
1819 class OMPTransformDirectiveScopeRAII
{
1820 OMPLoopScope
*Scope
= nullptr;
1821 CodeGenFunction::CGCapturedStmtInfo
*CGSI
= nullptr;
1822 CodeGenFunction::CGCapturedStmtRAII
*CapInfoRAII
= nullptr;
1825 OMPTransformDirectiveScopeRAII(CodeGenFunction
&CGF
, const Stmt
*S
) {
1826 if (const auto *Dir
= dyn_cast
<OMPLoopBasedDirective
>(S
)) {
1827 Scope
= new OMPLoopScope(CGF
, *Dir
);
1828 CGSI
= new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP
);
1829 CapInfoRAII
= new CodeGenFunction::CGCapturedStmtRAII(CGF
, CGSI
);
1832 ~OMPTransformDirectiveScopeRAII() {
1842 static void emitBody(CodeGenFunction
&CGF
, const Stmt
*S
, const Stmt
*NextLoop
,
1843 int MaxLevel
, int Level
= 0) {
1844 assert(Level
< MaxLevel
&& "Too deep lookup during loop body codegen.");
1845 const Stmt
*SimplifiedS
= S
->IgnoreContainers();
1846 if (const auto *CS
= dyn_cast
<CompoundStmt
>(SimplifiedS
)) {
1847 PrettyStackTraceLoc
CrashInfo(
1848 CGF
.getContext().getSourceManager(), CS
->getLBracLoc(),
1849 "LLVM IR generation of compound statement ('{}')");
1851 // Keep track of the current cleanup stack depth, including debug scopes.
1852 CodeGenFunction::LexicalScope
Scope(CGF
, S
->getSourceRange());
1853 for (const Stmt
*CurStmt
: CS
->body())
1854 emitBody(CGF
, CurStmt
, NextLoop
, MaxLevel
, Level
);
1857 if (SimplifiedS
== NextLoop
) {
1858 if (auto *Dir
= dyn_cast
<OMPLoopTransformationDirective
>(SimplifiedS
))
1859 SimplifiedS
= Dir
->getTransformedStmt();
1860 if (const auto *CanonLoop
= dyn_cast
<OMPCanonicalLoop
>(SimplifiedS
))
1861 SimplifiedS
= CanonLoop
->getLoopStmt();
1862 if (const auto *For
= dyn_cast
<ForStmt
>(SimplifiedS
)) {
1865 assert(isa
<CXXForRangeStmt
>(SimplifiedS
) &&
1866 "Expected canonical for loop or range-based for loop.");
1867 const auto *CXXFor
= cast
<CXXForRangeStmt
>(SimplifiedS
);
1868 CGF
.EmitStmt(CXXFor
->getLoopVarStmt());
1869 S
= CXXFor
->getBody();
1871 if (Level
+ 1 < MaxLevel
) {
1872 NextLoop
= OMPLoopDirective::tryToFindNextInnerLoop(
1873 S
, /*TryImperfectlyNestedLoops=*/true);
1874 emitBody(CGF
, S
, NextLoop
, MaxLevel
, Level
+ 1);
1881 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective
&D
,
1882 JumpDest LoopExit
) {
1883 RunCleanupsScope
BodyScope(*this);
1884 // Update counters values on current iteration.
1885 for (const Expr
*UE
: D
.updates())
1886 EmitIgnoredExpr(UE
);
1887 // Update the linear variables.
1888 // In distribute directives only loop counters may be marked as linear, no
1889 // need to generate the code for them.
1890 if (!isOpenMPDistributeDirective(D
.getDirectiveKind())) {
1891 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
1892 for (const Expr
*UE
: C
->updates())
1893 EmitIgnoredExpr(UE
);
1897 // On a continue in the body, jump to the end.
1898 JumpDest Continue
= getJumpDestInCurrentScope("omp.body.continue");
1899 BreakContinueStack
.push_back(BreakContinue(LoopExit
, Continue
));
1900 for (const Expr
*E
: D
.finals_conditions()) {
1903 // Check that loop counter in non-rectangular nest fits into the iteration
1905 llvm::BasicBlock
*NextBB
= createBasicBlock("omp.body.next");
1906 EmitBranchOnBoolExpr(E
, NextBB
, Continue
.getBlock(),
1907 getProfileCount(D
.getBody()));
1911 OMPPrivateScope
InscanScope(*this);
1912 EmitOMPReductionClauseInit(D
, InscanScope
, /*ForInscan=*/true);
1913 bool IsInscanRegion
= InscanScope
.Privatize();
1914 if (IsInscanRegion
) {
1915 // Need to remember the block before and after scan directive
1916 // to dispatch them correctly depending on the clause used in
1917 // this directive, inclusive or exclusive. For inclusive scan the natural
1918 // order of the blocks is used, for exclusive clause the blocks must be
1919 // executed in reverse order.
1920 OMPBeforeScanBlock
= createBasicBlock("omp.before.scan.bb");
1921 OMPAfterScanBlock
= createBasicBlock("omp.after.scan.bb");
1922 // No need to allocate inscan exit block, in simd mode it is selected in the
1923 // codegen for the scan directive.
1924 if (D
.getDirectiveKind() != OMPD_simd
&& !getLangOpts().OpenMPSimd
)
1925 OMPScanExitBlock
= createBasicBlock("omp.exit.inscan.bb");
1926 OMPScanDispatch
= createBasicBlock("omp.inscan.dispatch");
1927 EmitBranch(OMPScanDispatch
);
1928 EmitBlock(OMPBeforeScanBlock
);
1931 // Emit loop variables for C++ range loops.
1933 D
.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1935 emitBody(*this, Body
,
1936 OMPLoopBasedDirective::tryToFindNextInnerLoop(
1937 Body
, /*TryImperfectlyNestedLoops=*/true),
1938 D
.getLoopsNumber());
1940 // Jump to the dispatcher at the end of the loop body.
1942 EmitBranch(OMPScanExitBlock
);
1944 // The end (updates/cleanups).
1945 EmitBlock(Continue
.getBlock());
1946 BreakContinueStack
.pop_back();
1949 using EmittedClosureTy
= std::pair
<llvm::Function
*, llvm::Value
*>;
1951 /// Emit a captured statement and return the function as well as its captured
1952 /// closure context.
1953 static EmittedClosureTy
emitCapturedStmtFunc(CodeGenFunction
&ParentCGF
,
1954 const CapturedStmt
*S
) {
1955 LValue CapStruct
= ParentCGF
.InitCapturedStruct(*S
);
1956 CodeGenFunction
CGF(ParentCGF
.CGM
, /*suppressNewContext=*/true);
1957 std::unique_ptr
<CodeGenFunction::CGCapturedStmtInfo
> CSI
=
1958 std::make_unique
<CodeGenFunction::CGCapturedStmtInfo
>(*S
);
1959 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, CSI
.get());
1960 llvm::Function
*F
= CGF
.GenerateCapturedStmtFunction(*S
);
1962 return {F
, CapStruct
.getPointer(ParentCGF
)};
1965 /// Emit a call to a previously captured closure.
1966 static llvm::CallInst
*
1967 emitCapturedStmtCall(CodeGenFunction
&ParentCGF
, EmittedClosureTy Cap
,
1968 llvm::ArrayRef
<llvm::Value
*> Args
) {
1969 // Append the closure context to the argument.
1970 SmallVector
<llvm::Value
*> EffectiveArgs
;
1971 EffectiveArgs
.reserve(Args
.size() + 1);
1972 llvm::append_range(EffectiveArgs
, Args
);
1973 EffectiveArgs
.push_back(Cap
.second
);
1975 return ParentCGF
.Builder
.CreateCall(Cap
.first
, EffectiveArgs
);
1978 llvm::CanonicalLoopInfo
*
1979 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt
*S
, int Depth
) {
1980 assert(Depth
== 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1982 // The caller is processing the loop-associated directive processing the \p
1983 // Depth loops nested in \p S. Put the previous pending loop-associated
1984 // directive to the stack. If the current loop-associated directive is a loop
1985 // transformation directive, it will push its generated loops onto the stack
1986 // such that together with the loops left here they form the combined loop
1987 // nest for the parent loop-associated directive.
1988 int ParentExpectedOMPLoopDepth
= ExpectedOMPLoopDepth
;
1989 ExpectedOMPLoopDepth
= Depth
;
1992 assert(OMPLoopNestStack
.size() >= (size_t)Depth
&& "Found too few loops");
1994 // The last added loop is the outermost one.
1995 llvm::CanonicalLoopInfo
*Result
= OMPLoopNestStack
.back();
1997 // Pop the \p Depth loops requested by the call from that stack and restore
1998 // the previous context.
1999 OMPLoopNestStack
.pop_back_n(Depth
);
2000 ExpectedOMPLoopDepth
= ParentExpectedOMPLoopDepth
;
2005 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop
*S
) {
2006 const Stmt
*SyntacticalLoop
= S
->getLoopStmt();
2007 if (!getLangOpts().OpenMPIRBuilder
) {
2008 // Ignore if OpenMPIRBuilder is not enabled.
2009 EmitStmt(SyntacticalLoop
);
2013 LexicalScope
ForScope(*this, S
->getSourceRange());
2015 // Emit init statements. The Distance/LoopVar funcs may reference variable
2016 // declarations they contain.
2017 const Stmt
*BodyStmt
;
2018 if (const auto *For
= dyn_cast
<ForStmt
>(SyntacticalLoop
)) {
2019 if (const Stmt
*InitStmt
= For
->getInit())
2021 BodyStmt
= For
->getBody();
2022 } else if (const auto *RangeFor
=
2023 dyn_cast
<CXXForRangeStmt
>(SyntacticalLoop
)) {
2024 if (const DeclStmt
*RangeStmt
= RangeFor
->getRangeStmt())
2025 EmitStmt(RangeStmt
);
2026 if (const DeclStmt
*BeginStmt
= RangeFor
->getBeginStmt())
2027 EmitStmt(BeginStmt
);
2028 if (const DeclStmt
*EndStmt
= RangeFor
->getEndStmt())
2030 if (const DeclStmt
*LoopVarStmt
= RangeFor
->getLoopVarStmt())
2031 EmitStmt(LoopVarStmt
);
2032 BodyStmt
= RangeFor
->getBody();
2034 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2036 // Emit closure for later use. By-value captures will be captured here.
2037 const CapturedStmt
*DistanceFunc
= S
->getDistanceFunc();
2038 EmittedClosureTy DistanceClosure
= emitCapturedStmtFunc(*this, DistanceFunc
);
2039 const CapturedStmt
*LoopVarFunc
= S
->getLoopVarFunc();
2040 EmittedClosureTy LoopVarClosure
= emitCapturedStmtFunc(*this, LoopVarFunc
);
2042 // Call the distance function to get the number of iterations of the loop to
2044 QualType LogicalTy
= DistanceFunc
->getCapturedDecl()
2047 .getNonReferenceType();
2048 Address CountAddr
= CreateMemTemp(LogicalTy
, ".count.addr");
2049 emitCapturedStmtCall(*this, DistanceClosure
, {CountAddr
.getPointer()});
2050 llvm::Value
*DistVal
= Builder
.CreateLoad(CountAddr
, ".count");
2052 // Emit the loop structure.
2053 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
2054 auto BodyGen
= [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP
,
2055 llvm::Value
*IndVar
) {
2056 Builder
.restoreIP(CodeGenIP
);
2058 // Emit the loop body: Convert the logical iteration number to the loop
2059 // variable and emit the body.
2060 const DeclRefExpr
*LoopVarRef
= S
->getLoopVarRef();
2061 LValue LCVal
= EmitLValue(LoopVarRef
);
2062 Address LoopVarAddress
= LCVal
.getAddress(*this);
2063 emitCapturedStmtCall(*this, LoopVarClosure
,
2064 {LoopVarAddress
.getPointer(), IndVar
});
2066 RunCleanupsScope
BodyScope(*this);
2069 llvm::CanonicalLoopInfo
*CL
=
2070 OMPBuilder
.createCanonicalLoop(Builder
, BodyGen
, DistVal
);
2072 // Finish up the loop.
2073 Builder
.restoreIP(CL
->getAfterIP());
2074 ForScope
.ForceCleanup();
2076 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2077 OMPLoopNestStack
.push_back(CL
);
2080 void CodeGenFunction::EmitOMPInnerLoop(
2081 const OMPExecutableDirective
&S
, bool RequiresCleanup
, const Expr
*LoopCond
,
2082 const Expr
*IncExpr
,
2083 const llvm::function_ref
<void(CodeGenFunction
&)> BodyGen
,
2084 const llvm::function_ref
<void(CodeGenFunction
&)> PostIncGen
) {
2085 auto LoopExit
= getJumpDestInCurrentScope("omp.inner.for.end");
2087 // Start the loop with a block that tests the condition.
2088 auto CondBlock
= createBasicBlock("omp.inner.for.cond");
2089 EmitBlock(CondBlock
);
2090 const SourceRange R
= S
.getSourceRange();
2092 // If attributes are attached, push to the basic block with them.
2093 const auto &OMPED
= cast
<OMPExecutableDirective
>(S
);
2094 const CapturedStmt
*ICS
= OMPED
.getInnermostCapturedStmt();
2095 const Stmt
*SS
= ICS
->getCapturedStmt();
2096 const AttributedStmt
*AS
= dyn_cast_or_null
<AttributedStmt
>(SS
);
2097 OMPLoopNestStack
.clear();
2099 LoopStack
.push(CondBlock
, CGM
.getContext(), CGM
.getCodeGenOpts(),
2100 AS
->getAttrs(), SourceLocToDebugLoc(R
.getBegin()),
2101 SourceLocToDebugLoc(R
.getEnd()));
2103 LoopStack
.push(CondBlock
, SourceLocToDebugLoc(R
.getBegin()),
2104 SourceLocToDebugLoc(R
.getEnd()));
2106 // If there are any cleanups between here and the loop-exit scope,
2107 // create a block to stage a loop exit along.
2108 llvm::BasicBlock
*ExitBlock
= LoopExit
.getBlock();
2109 if (RequiresCleanup
)
2110 ExitBlock
= createBasicBlock("omp.inner.for.cond.cleanup");
2112 llvm::BasicBlock
*LoopBody
= createBasicBlock("omp.inner.for.body");
2115 EmitBranchOnBoolExpr(LoopCond
, LoopBody
, ExitBlock
, getProfileCount(&S
));
2116 if (ExitBlock
!= LoopExit
.getBlock()) {
2117 EmitBlock(ExitBlock
);
2118 EmitBranchThroughCleanup(LoopExit
);
2121 EmitBlock(LoopBody
);
2122 incrementProfileCounter(&S
);
2124 // Create a block for the increment.
2125 JumpDest Continue
= getJumpDestInCurrentScope("omp.inner.for.inc");
2126 BreakContinueStack
.push_back(BreakContinue(LoopExit
, Continue
));
2130 // Emit "IV = IV + 1" and a back-edge to the condition block.
2131 EmitBlock(Continue
.getBlock());
2132 EmitIgnoredExpr(IncExpr
);
2134 BreakContinueStack
.pop_back();
2135 EmitBranch(CondBlock
);
2137 // Emit the fall-through block.
2138 EmitBlock(LoopExit
.getBlock());
2141 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective
&D
) {
2142 if (!HaveInsertPoint())
2144 // Emit inits for the linear variables.
2145 bool HasLinears
= false;
2146 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
2147 for (const Expr
*Init
: C
->inits()) {
2149 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(Init
)->getDecl());
2150 if (const auto *Ref
=
2151 dyn_cast
<DeclRefExpr
>(VD
->getInit()->IgnoreImpCasts())) {
2152 AutoVarEmission Emission
= EmitAutoVarAlloca(*VD
);
2153 const auto *OrigVD
= cast
<VarDecl
>(Ref
->getDecl());
2154 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
2155 CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
2156 VD
->getInit()->getType(), VK_LValue
,
2157 VD
->getInit()->getExprLoc());
2160 MakeAddrLValue(Emission
.getAllocatedAddress(), VD
->getType()),
2161 /*capturedByInit=*/false);
2162 EmitAutoVarCleanups(Emission
);
2167 // Emit the linear steps for the linear clauses.
2168 // If a step is not constant, it is pre-calculated before the loop.
2169 if (const auto *CS
= cast_or_null
<BinaryOperator
>(C
->getCalcStep()))
2170 if (const auto *SaveRef
= cast
<DeclRefExpr
>(CS
->getLHS())) {
2171 EmitVarDecl(*cast
<VarDecl
>(SaveRef
->getDecl()));
2172 // Emit calculation of the linear step.
2173 EmitIgnoredExpr(CS
);
2179 void CodeGenFunction::EmitOMPLinearClauseFinal(
2180 const OMPLoopDirective
&D
,
2181 const llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> CondGen
) {
2182 if (!HaveInsertPoint())
2184 llvm::BasicBlock
*DoneBB
= nullptr;
2185 // Emit the final values of the linear variables.
2186 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
2187 auto IC
= C
->varlist_begin();
2188 for (const Expr
*F
: C
->finals()) {
2190 if (llvm::Value
*Cond
= CondGen(*this)) {
2191 // If the first post-update expression is found, emit conditional
2192 // block if it was requested.
2193 llvm::BasicBlock
*ThenBB
= createBasicBlock(".omp.linear.pu");
2194 DoneBB
= createBasicBlock(".omp.linear.pu.done");
2195 Builder
.CreateCondBr(Cond
, ThenBB
, DoneBB
);
2199 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IC
)->getDecl());
2200 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
2201 CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
2202 (*IC
)->getType(), VK_LValue
, (*IC
)->getExprLoc());
2203 Address OrigAddr
= EmitLValue(&DRE
).getAddress(*this);
2204 CodeGenFunction::OMPPrivateScope
VarScope(*this);
2205 VarScope
.addPrivate(OrigVD
, OrigAddr
);
2206 (void)VarScope
.Privatize();
2210 if (const Expr
*PostUpdate
= C
->getPostUpdateExpr())
2211 EmitIgnoredExpr(PostUpdate
);
2214 EmitBlock(DoneBB
, /*IsFinished=*/true);
2217 static void emitAlignedClause(CodeGenFunction
&CGF
,
2218 const OMPExecutableDirective
&D
) {
2219 if (!CGF
.HaveInsertPoint())
2221 for (const auto *Clause
: D
.getClausesOfKind
<OMPAlignedClause
>()) {
2222 llvm::APInt
ClauseAlignment(64, 0);
2223 if (const Expr
*AlignmentExpr
= Clause
->getAlignment()) {
2225 cast
<llvm::ConstantInt
>(CGF
.EmitScalarExpr(AlignmentExpr
));
2226 ClauseAlignment
= AlignmentCI
->getValue();
2228 for (const Expr
*E
: Clause
->varlists()) {
2229 llvm::APInt
Alignment(ClauseAlignment
);
2230 if (Alignment
== 0) {
2231 // OpenMP [2.8.1, Description]
2232 // If no optional parameter is specified, implementation-defined default
2233 // alignments for SIMD instructions on the target platforms are assumed.
2236 .toCharUnitsFromBits(CGF
.getContext().getOpenMPDefaultSimdAlign(
2237 E
->getType()->getPointeeType()))
2240 assert((Alignment
== 0 || Alignment
.isPowerOf2()) &&
2241 "alignment is not power of 2");
2242 if (Alignment
!= 0) {
2243 llvm::Value
*PtrValue
= CGF
.EmitScalarExpr(E
);
2244 CGF
.emitAlignmentAssumption(
2245 PtrValue
, E
, /*No second loc needed*/ SourceLocation(),
2246 llvm::ConstantInt::get(CGF
.getLLVMContext(), Alignment
));
2252 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2253 const OMPLoopDirective
&S
, CodeGenFunction::OMPPrivateScope
&LoopScope
) {
2254 if (!HaveInsertPoint())
2256 auto I
= S
.private_counters().begin();
2257 for (const Expr
*E
: S
.counters()) {
2258 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
2259 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl());
2260 // Emit var without initialization.
2261 AutoVarEmission VarEmission
= EmitAutoVarAlloca(*PrivateVD
);
2262 EmitAutoVarCleanups(VarEmission
);
2263 LocalDeclMap
.erase(PrivateVD
);
2264 (void)LoopScope
.addPrivate(VD
, VarEmission
.getAllocatedAddress());
2265 if (LocalDeclMap
.count(VD
) || CapturedStmtInfo
->lookup(VD
) ||
2266 VD
->hasGlobalStorage()) {
2267 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(VD
),
2268 LocalDeclMap
.count(VD
) || CapturedStmtInfo
->lookup(VD
),
2269 E
->getType(), VK_LValue
, E
->getExprLoc());
2270 (void)LoopScope
.addPrivate(PrivateVD
, EmitLValue(&DRE
).getAddress(*this));
2272 (void)LoopScope
.addPrivate(PrivateVD
, VarEmission
.getAllocatedAddress());
2276 // Privatize extra loop counters used in loops for ordered(n) clauses.
2277 for (const auto *C
: S
.getClausesOfKind
<OMPOrderedClause
>()) {
2278 if (!C
->getNumForLoops())
2280 for (unsigned I
= S
.getLoopsNumber(), E
= C
->getLoopNumIterations().size();
2282 const auto *DRE
= cast
<DeclRefExpr
>(C
->getLoopCounter(I
));
2283 const auto *VD
= cast
<VarDecl
>(DRE
->getDecl());
2284 // Override only those variables that can be captured to avoid re-emission
2285 // of the variables declared within the loops.
2286 if (DRE
->refersToEnclosingVariableOrCapture()) {
2287 (void)LoopScope
.addPrivate(
2288 VD
, CreateMemTemp(DRE
->getType(), VD
->getName()));
2294 static void emitPreCond(CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2295 const Expr
*Cond
, llvm::BasicBlock
*TrueBlock
,
2296 llvm::BasicBlock
*FalseBlock
, uint64_t TrueCount
) {
2297 if (!CGF
.HaveInsertPoint())
2300 CodeGenFunction::OMPPrivateScope
PreCondScope(CGF
);
2301 CGF
.EmitOMPPrivateLoopCounters(S
, PreCondScope
);
2302 (void)PreCondScope
.Privatize();
2303 // Get initial values of real counters.
2304 for (const Expr
*I
: S
.inits()) {
2305 CGF
.EmitIgnoredExpr(I
);
2308 // Create temp loop control variables with their init values to support
2309 // non-rectangular loops.
2310 CodeGenFunction::OMPMapVars PreCondVars
;
2311 for (const Expr
*E
: S
.dependent_counters()) {
2314 assert(!E
->getType().getNonReferenceType()->isRecordType() &&
2315 "dependent counter must not be an iterator.");
2316 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
2317 Address CounterAddr
=
2318 CGF
.CreateMemTemp(VD
->getType().getNonReferenceType());
2319 (void)PreCondVars
.setVarAddr(CGF
, VD
, CounterAddr
);
2321 (void)PreCondVars
.apply(CGF
);
2322 for (const Expr
*E
: S
.dependent_inits()) {
2325 CGF
.EmitIgnoredExpr(E
);
2327 // Check that loop is executed at least one time.
2328 CGF
.EmitBranchOnBoolExpr(Cond
, TrueBlock
, FalseBlock
, TrueCount
);
2329 PreCondVars
.restore(CGF
);
2332 void CodeGenFunction::EmitOMPLinearClause(
2333 const OMPLoopDirective
&D
, CodeGenFunction::OMPPrivateScope
&PrivateScope
) {
2334 if (!HaveInsertPoint())
2336 llvm::DenseSet
<const VarDecl
*> SIMDLCVs
;
2337 if (isOpenMPSimdDirective(D
.getDirectiveKind())) {
2338 const auto *LoopDirective
= cast
<OMPLoopDirective
>(&D
);
2339 for (const Expr
*C
: LoopDirective
->counters()) {
2341 cast
<VarDecl
>(cast
<DeclRefExpr
>(C
)->getDecl())->getCanonicalDecl());
2344 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
2345 auto CurPrivate
= C
->privates().begin();
2346 for (const Expr
*E
: C
->varlists()) {
2347 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
2348 const auto *PrivateVD
=
2349 cast
<VarDecl
>(cast
<DeclRefExpr
>(*CurPrivate
)->getDecl());
2350 if (!SIMDLCVs
.count(VD
->getCanonicalDecl())) {
2351 // Emit private VarDecl with copy init.
2352 EmitVarDecl(*PrivateVD
);
2354 PrivateScope
.addPrivate(VD
, GetAddrOfLocalVar(PrivateVD
));
2355 assert(IsRegistered
&& "linear var already registered as private");
2356 // Silence the warning about unused variable.
2359 EmitVarDecl(*PrivateVD
);
2366 static void emitSimdlenSafelenClause(CodeGenFunction
&CGF
,
2367 const OMPExecutableDirective
&D
) {
2368 if (!CGF
.HaveInsertPoint())
2370 if (const auto *C
= D
.getSingleClause
<OMPSimdlenClause
>()) {
2371 RValue Len
= CGF
.EmitAnyExpr(C
->getSimdlen(), AggValueSlot::ignored(),
2372 /*ignoreResult=*/true);
2373 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2374 CGF
.LoopStack
.setVectorizeWidth(Val
->getZExtValue());
2375 // In presence of finite 'safelen', it may be unsafe to mark all
2376 // the memory instructions parallel, because loop-carried
2377 // dependences of 'safelen' iterations are possible.
2378 CGF
.LoopStack
.setParallel(!D
.getSingleClause
<OMPSafelenClause
>());
2379 } else if (const auto *C
= D
.getSingleClause
<OMPSafelenClause
>()) {
2380 RValue Len
= CGF
.EmitAnyExpr(C
->getSafelen(), AggValueSlot::ignored(),
2381 /*ignoreResult=*/true);
2382 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2383 CGF
.LoopStack
.setVectorizeWidth(Val
->getZExtValue());
2384 // In presence of finite 'safelen', it may be unsafe to mark all
2385 // the memory instructions parallel, because loop-carried
2386 // dependences of 'safelen' iterations are possible.
2387 CGF
.LoopStack
.setParallel(/*Enable=*/false);
2391 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
&D
) {
2392 // Walk clauses and process safelen/lastprivate.
2393 LoopStack
.setParallel(/*Enable=*/true);
2394 LoopStack
.setVectorizeEnable();
2395 emitSimdlenSafelenClause(*this, D
);
2396 if (const auto *C
= D
.getSingleClause
<OMPOrderClause
>())
2397 if (C
->getKind() == OMPC_ORDER_concurrent
)
2398 LoopStack
.setParallel(/*Enable=*/true);
2399 if ((D
.getDirectiveKind() == OMPD_simd
||
2400 (getLangOpts().OpenMPSimd
&&
2401 isOpenMPSimdDirective(D
.getDirectiveKind()))) &&
2402 llvm::any_of(D
.getClausesOfKind
<OMPReductionClause
>(),
2403 [](const OMPReductionClause
*C
) {
2404 return C
->getModifier() == OMPC_REDUCTION_inscan
;
2406 // Disable parallel access in case of prefix sum.
2407 LoopStack
.setParallel(/*Enable=*/false);
2410 void CodeGenFunction::EmitOMPSimdFinal(
2411 const OMPLoopDirective
&D
,
2412 const llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> CondGen
) {
2413 if (!HaveInsertPoint())
2415 llvm::BasicBlock
*DoneBB
= nullptr;
2416 auto IC
= D
.counters().begin();
2417 auto IPC
= D
.private_counters().begin();
2418 for (const Expr
*F
: D
.finals()) {
2419 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>((*IC
))->getDecl());
2420 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>((*IPC
))->getDecl());
2421 const auto *CED
= dyn_cast
<OMPCapturedExprDecl
>(OrigVD
);
2422 if (LocalDeclMap
.count(OrigVD
) || CapturedStmtInfo
->lookup(OrigVD
) ||
2423 OrigVD
->hasGlobalStorage() || CED
) {
2425 if (llvm::Value
*Cond
= CondGen(*this)) {
2426 // If the first post-update expression is found, emit conditional
2427 // block if it was requested.
2428 llvm::BasicBlock
*ThenBB
= createBasicBlock(".omp.final.then");
2429 DoneBB
= createBasicBlock(".omp.final.done");
2430 Builder
.CreateCondBr(Cond
, ThenBB
, DoneBB
);
2434 Address OrigAddr
= Address::invalid();
2437 EmitLValue(CED
->getInit()->IgnoreImpCasts()).getAddress(*this);
2439 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(PrivateVD
),
2440 /*RefersToEnclosingVariableOrCapture=*/false,
2441 (*IPC
)->getType(), VK_LValue
, (*IPC
)->getExprLoc());
2442 OrigAddr
= EmitLValue(&DRE
).getAddress(*this);
2444 OMPPrivateScope
VarScope(*this);
2445 VarScope
.addPrivate(OrigVD
, OrigAddr
);
2446 (void)VarScope
.Privatize();
2453 EmitBlock(DoneBB
, /*IsFinished=*/true);
2456 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction
&CGF
,
2457 const OMPLoopDirective
&S
,
2458 CodeGenFunction::JumpDest LoopExit
) {
2459 CGF
.EmitOMPLoopBody(S
, LoopExit
);
2460 CGF
.EmitStopPoint(&S
);
2463 /// Emit a helper variable and return corresponding lvalue.
2464 static LValue
EmitOMPHelperVar(CodeGenFunction
&CGF
,
2465 const DeclRefExpr
*Helper
) {
2466 auto VDecl
= cast
<VarDecl
>(Helper
->getDecl());
2467 CGF
.EmitVarDecl(*VDecl
);
2468 return CGF
.EmitLValue(Helper
);
2471 static void emitCommonSimdLoop(CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2472 const RegionCodeGenTy
&SimdInitGen
,
2473 const RegionCodeGenTy
&BodyCodeGen
) {
2474 auto &&ThenGen
= [&S
, &SimdInitGen
, &BodyCodeGen
](CodeGenFunction
&CGF
,
2475 PrePostActionTy
&) {
2476 CGOpenMPRuntime::NontemporalDeclsRAII
NontemporalsRegion(CGF
.CGM
, S
);
2477 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
2482 auto &&ElseGen
= [&BodyCodeGen
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2483 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
2484 CGF
.LoopStack
.setVectorizeEnable(/*Enable=*/false);
2488 const Expr
*IfCond
= nullptr;
2489 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
2490 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
2491 if (CGF
.getLangOpts().OpenMP
>= 50 &&
2492 (C
->getNameModifier() == OMPD_unknown
||
2493 C
->getNameModifier() == OMPD_simd
)) {
2494 IfCond
= C
->getCondition();
2500 CGF
.CGM
.getOpenMPRuntime().emitIfClause(CGF
, IfCond
, ThenGen
, ElseGen
);
2502 RegionCodeGenTy
ThenRCG(ThenGen
);
2507 static void emitOMPSimdRegion(CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2508 PrePostActionTy
&Action
) {
2510 assert(isOpenMPSimdDirective(S
.getDirectiveKind()) &&
2511 "Expected simd directive");
2512 OMPLoopScope
PreInitScope(CGF
, S
);
2514 // for (IV in 0..LastIteration) BODY;
2515 // <Final counter/linear vars updates>;
2518 if (isOpenMPDistributeDirective(S
.getDirectiveKind()) ||
2519 isOpenMPWorksharingDirective(S
.getDirectiveKind()) ||
2520 isOpenMPTaskLoopDirective(S
.getDirectiveKind())) {
2521 (void)EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(S
.getLowerBoundVariable()));
2522 (void)EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(S
.getUpperBoundVariable()));
2525 // Emit: if (PreCond) - begin.
2526 // If the condition constant folds and can be elided, avoid emitting the
2529 llvm::BasicBlock
*ContBlock
= nullptr;
2530 if (CGF
.ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
2534 llvm::BasicBlock
*ThenBlock
= CGF
.createBasicBlock("simd.if.then");
2535 ContBlock
= CGF
.createBasicBlock("simd.if.end");
2536 emitPreCond(CGF
, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
2537 CGF
.getProfileCount(&S
));
2538 CGF
.EmitBlock(ThenBlock
);
2539 CGF
.incrementProfileCounter(&S
);
2542 // Emit the loop iteration variable.
2543 const Expr
*IVExpr
= S
.getIterationVariable();
2544 const auto *IVDecl
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IVExpr
)->getDecl());
2545 CGF
.EmitVarDecl(*IVDecl
);
2546 CGF
.EmitIgnoredExpr(S
.getInit());
2548 // Emit the iterations count variable.
2549 // If it is not a variable, Sema decided to calculate iterations count on
2550 // each iteration (e.g., it is foldable into a constant).
2551 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
2552 CGF
.EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
2553 // Emit calculation of the iterations count.
2554 CGF
.EmitIgnoredExpr(S
.getCalcLastIteration());
2557 emitAlignedClause(CGF
, S
);
2558 (void)CGF
.EmitOMPLinearClauseInit(S
);
2560 CodeGenFunction::OMPPrivateScope
LoopScope(CGF
);
2561 CGF
.EmitOMPPrivateLoopCounters(S
, LoopScope
);
2562 CGF
.EmitOMPLinearClause(S
, LoopScope
);
2563 CGF
.EmitOMPPrivateClause(S
, LoopScope
);
2564 CGF
.EmitOMPReductionClauseInit(S
, LoopScope
);
2565 CGOpenMPRuntime::LastprivateConditionalRAII
LPCRegion(
2566 CGF
, S
, CGF
.EmitLValue(S
.getIterationVariable()));
2567 bool HasLastprivateClause
= CGF
.EmitOMPLastprivateClauseInit(S
, LoopScope
);
2568 (void)LoopScope
.Privatize();
2569 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
2570 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
2574 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2575 CGF
.EmitOMPSimdInit(S
);
2577 [&S
, &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2578 CGF
.EmitOMPInnerLoop(
2579 S
, LoopScope
.requiresCleanups(), S
.getCond(), S
.getInc(),
2580 [&S
](CodeGenFunction
&CGF
) {
2581 emitOMPLoopBodyWithStopPoint(CGF
, S
,
2582 CodeGenFunction::JumpDest());
2584 [](CodeGenFunction
&) {});
2586 CGF
.EmitOMPSimdFinal(S
, [](CodeGenFunction
&) { return nullptr; });
2587 // Emit final copy of the lastprivate variables at the end of loops.
2588 if (HasLastprivateClause
)
2589 CGF
.EmitOMPLastprivateClauseFinal(S
, /*NoFinals=*/true);
2590 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_simd
);
2591 emitPostUpdateForReductionClause(CGF
, S
,
2592 [](CodeGenFunction
&) { return nullptr; });
2593 LoopScope
.restoreMap();
2594 CGF
.EmitOMPLinearClauseFinal(S
, [](CodeGenFunction
&) { return nullptr; });
2596 // Emit: if (PreCond) - end.
2598 CGF
.EmitBranch(ContBlock
);
2599 CGF
.EmitBlock(ContBlock
, true);
2603 static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective
&S
) {
2604 // Check for unsupported clauses
2605 for (OMPClause
*C
: S
.clauses()) {
2606 // Currently only order, simdlen and safelen clauses are supported
2607 if (!(isa
<OMPSimdlenClause
>(C
) || isa
<OMPSafelenClause
>(C
) ||
2608 isa
<OMPOrderClause
>(C
) || isa
<OMPAlignedClause
>(C
)))
2612 // Check if we have a statement with the ordered directive.
2613 // Visit the statement hierarchy to find a compound statement
2614 // with a ordered directive in it.
2615 if (const auto *CanonLoop
= dyn_cast
<OMPCanonicalLoop
>(S
.getRawStmt())) {
2616 if (const Stmt
*SyntacticalLoop
= CanonLoop
->getLoopStmt()) {
2617 for (const Stmt
*SubStmt
: SyntacticalLoop
->children()) {
2620 if (const CompoundStmt
*CS
= dyn_cast
<CompoundStmt
>(SubStmt
)) {
2621 for (const Stmt
*CSSubStmt
: CS
->children()) {
2624 if (isa
<OMPOrderedDirective
>(CSSubStmt
)) {
2634 static llvm::MapVector
<llvm::Value
*, llvm::Value
*>
2635 GetAlignedMapping(const OMPSimdDirective
&S
, CodeGenFunction
&CGF
) {
2636 llvm::MapVector
<llvm::Value
*, llvm::Value
*> AlignedVars
;
2637 for (const auto *Clause
: S
.getClausesOfKind
<OMPAlignedClause
>()) {
2638 llvm::APInt
ClauseAlignment(64, 0);
2639 if (const Expr
*AlignmentExpr
= Clause
->getAlignment()) {
2641 cast
<llvm::ConstantInt
>(CGF
.EmitScalarExpr(AlignmentExpr
));
2642 ClauseAlignment
= AlignmentCI
->getValue();
2644 for (const Expr
*E
: Clause
->varlists()) {
2645 llvm::APInt
Alignment(ClauseAlignment
);
2646 if (Alignment
== 0) {
2647 // OpenMP [2.8.1, Description]
2648 // If no optional parameter is specified, implementation-defined default
2649 // alignments for SIMD instructions on the target platforms are assumed.
2652 .toCharUnitsFromBits(CGF
.getContext().getOpenMPDefaultSimdAlign(
2653 E
->getType()->getPointeeType()))
2656 assert((Alignment
== 0 || Alignment
.isPowerOf2()) &&
2657 "alignment is not power of 2");
2658 llvm::Value
*PtrValue
= CGF
.EmitScalarExpr(E
);
2659 AlignedVars
[PtrValue
] = CGF
.Builder
.getInt64(Alignment
.getSExtValue());
2665 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective
&S
) {
2666 bool UseOMPIRBuilder
=
2667 CGM
.getLangOpts().OpenMPIRBuilder
&& isSupportedByOpenMPIRBuilder(S
);
2668 if (UseOMPIRBuilder
) {
2669 auto &&CodeGenIRBuilder
= [this, &S
, UseOMPIRBuilder
](CodeGenFunction
&CGF
,
2670 PrePostActionTy
&) {
2671 // Use the OpenMPIRBuilder if enabled.
2672 if (UseOMPIRBuilder
) {
2673 llvm::MapVector
<llvm::Value
*, llvm::Value
*> AlignedVars
=
2674 GetAlignedMapping(S
, CGF
);
2675 // Emit the associated statement and get its loop representation.
2676 const Stmt
*Inner
= S
.getRawStmt();
2677 llvm::CanonicalLoopInfo
*CLI
=
2678 EmitOMPCollapsedCanonicalLoopNest(Inner
, 1);
2680 llvm::OpenMPIRBuilder
&OMPBuilder
=
2681 CGM
.getOpenMPRuntime().getOMPBuilder();
2682 // Add SIMD specific metadata
2683 llvm::ConstantInt
*Simdlen
= nullptr;
2684 if (const auto *C
= S
.getSingleClause
<OMPSimdlenClause
>()) {
2686 this->EmitAnyExpr(C
->getSimdlen(), AggValueSlot::ignored(),
2687 /*ignoreResult=*/true);
2688 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2691 llvm::ConstantInt
*Safelen
= nullptr;
2692 if (const auto *C
= S
.getSingleClause
<OMPSafelenClause
>()) {
2694 this->EmitAnyExpr(C
->getSafelen(), AggValueSlot::ignored(),
2695 /*ignoreResult=*/true);
2696 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2699 llvm::omp::OrderKind Order
= llvm::omp::OrderKind::OMP_ORDER_unknown
;
2700 if (const auto *C
= S
.getSingleClause
<OMPOrderClause
>()) {
2701 if (C
->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent
) {
2702 Order
= llvm::omp::OrderKind::OMP_ORDER_concurrent
;
2705 // Add simd metadata to the collapsed loop. Do not generate
2706 // another loop for if clause. Support for if clause is done earlier.
2707 OMPBuilder
.applySimd(CLI
, AlignedVars
,
2708 /*IfCond*/ nullptr, Order
, Simdlen
, Safelen
);
2714 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
2715 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
2716 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
,
2722 ParentLoopDirectiveForScanRegion
ScanRegion(*this, S
);
2723 OMPFirstScanLoop
= true;
2724 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
2725 emitOMPSimdRegion(CGF
, S
, Action
);
2729 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
2730 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
2731 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
, CodeGen
);
2733 // Check for outer lastprivate conditional update.
2734 checkForLastprivateConditionalUpdate(*this, S
);
2737 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective
&S
) {
2738 // Emit the de-sugared statement.
2739 OMPTransformDirectiveScopeRAII
TileScope(*this, &S
);
2740 EmitStmt(S
.getTransformedStmt());
2743 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective
&S
) {
2744 bool UseOMPIRBuilder
= CGM
.getLangOpts().OpenMPIRBuilder
;
2746 if (UseOMPIRBuilder
) {
2747 auto DL
= SourceLocToDebugLoc(S
.getBeginLoc());
2748 const Stmt
*Inner
= S
.getRawStmt();
2750 // Consume nested loop. Clear the entire remaining loop stack because a
2751 // fully unrolled loop is non-transformable. For partial unrolling the
2752 // generated outer loop is pushed back to the stack.
2753 llvm::CanonicalLoopInfo
*CLI
= EmitOMPCollapsedCanonicalLoopNest(Inner
, 1);
2754 OMPLoopNestStack
.clear();
2756 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
2758 bool NeedsUnrolledCLI
= ExpectedOMPLoopDepth
>= 1;
2759 llvm::CanonicalLoopInfo
*UnrolledCLI
= nullptr;
2761 if (S
.hasClausesOfKind
<OMPFullClause
>()) {
2762 assert(ExpectedOMPLoopDepth
== 0);
2763 OMPBuilder
.unrollLoopFull(DL
, CLI
);
2764 } else if (auto *PartialClause
= S
.getSingleClause
<OMPPartialClause
>()) {
2765 uint64_t Factor
= 0;
2766 if (Expr
*FactorExpr
= PartialClause
->getFactor()) {
2767 Factor
= FactorExpr
->EvaluateKnownConstInt(getContext()).getZExtValue();
2768 assert(Factor
>= 1 && "Only positive factors are valid");
2770 OMPBuilder
.unrollLoopPartial(DL
, CLI
, Factor
,
2771 NeedsUnrolledCLI
? &UnrolledCLI
: nullptr);
2773 OMPBuilder
.unrollLoopHeuristic(DL
, CLI
);
2776 assert((!NeedsUnrolledCLI
|| UnrolledCLI
) &&
2777 "NeedsUnrolledCLI implies UnrolledCLI to be set");
2779 OMPLoopNestStack
.push_back(UnrolledCLI
);
2784 // This function is only called if the unrolled loop is not consumed by any
2785 // other loop-associated construct. Such a loop-associated construct will have
2786 // used the transformed AST.
2788 // Set the unroll metadata for the next emitted loop.
2789 LoopStack
.setUnrollState(LoopAttributes::Enable
);
2791 if (S
.hasClausesOfKind
<OMPFullClause
>()) {
2792 LoopStack
.setUnrollState(LoopAttributes::Full
);
2793 } else if (auto *PartialClause
= S
.getSingleClause
<OMPPartialClause
>()) {
2794 if (Expr
*FactorExpr
= PartialClause
->getFactor()) {
2796 FactorExpr
->EvaluateKnownConstInt(getContext()).getZExtValue();
2797 assert(Factor
>= 1 && "Only positive factors are valid");
2798 LoopStack
.setUnrollCount(Factor
);
2802 EmitStmt(S
.getAssociatedStmt());
2805 void CodeGenFunction::EmitOMPOuterLoop(
2806 bool DynamicOrOrdered
, bool IsMonotonic
, const OMPLoopDirective
&S
,
2807 CodeGenFunction::OMPPrivateScope
&LoopScope
,
2808 const CodeGenFunction::OMPLoopArguments
&LoopArgs
,
2809 const CodeGenFunction::CodeGenLoopTy
&CodeGenLoop
,
2810 const CodeGenFunction::CodeGenOrderedTy
&CodeGenOrdered
) {
2811 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
2813 const Expr
*IVExpr
= S
.getIterationVariable();
2814 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
2815 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
2817 JumpDest LoopExit
= getJumpDestInCurrentScope("omp.dispatch.end");
2819 // Start the loop with a block that tests the condition.
2820 llvm::BasicBlock
*CondBlock
= createBasicBlock("omp.dispatch.cond");
2821 EmitBlock(CondBlock
);
2822 const SourceRange R
= S
.getSourceRange();
2823 OMPLoopNestStack
.clear();
2824 LoopStack
.push(CondBlock
, SourceLocToDebugLoc(R
.getBegin()),
2825 SourceLocToDebugLoc(R
.getEnd()));
2827 llvm::Value
*BoolCondVal
= nullptr;
2828 if (!DynamicOrOrdered
) {
2829 // UB = min(UB, GlobalUB) or
2830 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2831 // 'distribute parallel for')
2832 EmitIgnoredExpr(LoopArgs
.EUB
);
2834 EmitIgnoredExpr(LoopArgs
.Init
);
2836 BoolCondVal
= EvaluateExprAsBool(LoopArgs
.Cond
);
2839 RT
.emitForNext(*this, S
.getBeginLoc(), IVSize
, IVSigned
, LoopArgs
.IL
,
2840 LoopArgs
.LB
, LoopArgs
.UB
, LoopArgs
.ST
);
2843 // If there are any cleanups between here and the loop-exit scope,
2844 // create a block to stage a loop exit along.
2845 llvm::BasicBlock
*ExitBlock
= LoopExit
.getBlock();
2846 if (LoopScope
.requiresCleanups())
2847 ExitBlock
= createBasicBlock("omp.dispatch.cleanup");
2849 llvm::BasicBlock
*LoopBody
= createBasicBlock("omp.dispatch.body");
2850 Builder
.CreateCondBr(BoolCondVal
, LoopBody
, ExitBlock
);
2851 if (ExitBlock
!= LoopExit
.getBlock()) {
2852 EmitBlock(ExitBlock
);
2853 EmitBranchThroughCleanup(LoopExit
);
2855 EmitBlock(LoopBody
);
2857 // Emit "IV = LB" (in case of static schedule, we have already calculated new
2858 // LB for loop condition and emitted it above).
2859 if (DynamicOrOrdered
)
2860 EmitIgnoredExpr(LoopArgs
.Init
);
2862 // Create a block for the increment.
2863 JumpDest Continue
= getJumpDestInCurrentScope("omp.dispatch.inc");
2864 BreakContinueStack
.push_back(BreakContinue(LoopExit
, Continue
));
2868 [&S
, IsMonotonic
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2869 // Generate !llvm.loop.parallel metadata for loads and stores for loops
2870 // with dynamic/guided scheduling and without ordered clause.
2871 if (!isOpenMPSimdDirective(S
.getDirectiveKind())) {
2872 CGF
.LoopStack
.setParallel(!IsMonotonic
);
2873 if (const auto *C
= S
.getSingleClause
<OMPOrderClause
>())
2874 if (C
->getKind() == OMPC_ORDER_concurrent
)
2875 CGF
.LoopStack
.setParallel(/*Enable=*/true);
2877 CGF
.EmitOMPSimdInit(S
);
2880 [&S
, &LoopArgs
, LoopExit
, &CodeGenLoop
, IVSize
, IVSigned
, &CodeGenOrdered
,
2881 &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2882 SourceLocation Loc
= S
.getBeginLoc();
2883 // when 'distribute' is not combined with a 'for':
2884 // while (idx <= UB) { BODY; ++idx; }
2885 // when 'distribute' is combined with a 'for'
2886 // (e.g. 'distribute parallel for')
2887 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2888 CGF
.EmitOMPInnerLoop(
2889 S
, LoopScope
.requiresCleanups(), LoopArgs
.Cond
, LoopArgs
.IncExpr
,
2890 [&S
, LoopExit
, &CodeGenLoop
](CodeGenFunction
&CGF
) {
2891 CodeGenLoop(CGF
, S
, LoopExit
);
2893 [IVSize
, IVSigned
, Loc
, &CodeGenOrdered
](CodeGenFunction
&CGF
) {
2894 CodeGenOrdered(CGF
, Loc
, IVSize
, IVSigned
);
2898 EmitBlock(Continue
.getBlock());
2899 BreakContinueStack
.pop_back();
2900 if (!DynamicOrOrdered
) {
2901 // Emit "LB = LB + Stride", "UB = UB + Stride".
2902 EmitIgnoredExpr(LoopArgs
.NextLB
);
2903 EmitIgnoredExpr(LoopArgs
.NextUB
);
2906 EmitBranch(CondBlock
);
2907 OMPLoopNestStack
.clear();
2909 // Emit the fall-through block.
2910 EmitBlock(LoopExit
.getBlock());
2912 // Tell the runtime we are done.
2913 auto &&CodeGen
= [DynamicOrOrdered
, &S
](CodeGenFunction
&CGF
) {
2914 if (!DynamicOrOrdered
)
2915 CGF
.CGM
.getOpenMPRuntime().emitForStaticFinish(CGF
, S
.getEndLoc(),
2916 S
.getDirectiveKind());
2918 OMPCancelStack
.emitExit(*this, S
.getDirectiveKind(), CodeGen
);
2921 void CodeGenFunction::EmitOMPForOuterLoop(
2922 const OpenMPScheduleTy
&ScheduleKind
, bool IsMonotonic
,
2923 const OMPLoopDirective
&S
, OMPPrivateScope
&LoopScope
, bool Ordered
,
2924 const OMPLoopArguments
&LoopArgs
,
2925 const CodeGenDispatchBoundsTy
&CGDispatchBounds
) {
2926 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
2928 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2929 const bool DynamicOrOrdered
= Ordered
|| RT
.isDynamic(ScheduleKind
.Schedule
);
2931 assert((Ordered
|| !RT
.isStaticNonchunked(ScheduleKind
.Schedule
,
2932 LoopArgs
.Chunk
!= nullptr)) &&
2933 "static non-chunked schedule does not need outer loop");
2937 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2938 // When schedule(dynamic,chunk_size) is specified, the iterations are
2939 // distributed to threads in the team in chunks as the threads request them.
2940 // Each thread executes a chunk of iterations, then requests another chunk,
2941 // until no chunks remain to be distributed. Each chunk contains chunk_size
2942 // iterations, except for the last chunk to be distributed, which may have
2943 // fewer iterations. When no chunk_size is specified, it defaults to 1.
2945 // When schedule(guided,chunk_size) is specified, the iterations are assigned
2946 // to threads in the team in chunks as the executing threads request them.
2947 // Each thread executes a chunk of iterations, then requests another chunk,
2948 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2949 // each chunk is proportional to the number of unassigned iterations divided
2950 // by the number of threads in the team, decreasing to 1. For a chunk_size
2951 // with value k (greater than 1), the size of each chunk is determined in the
2952 // same way, with the restriction that the chunks do not contain fewer than k
2953 // iterations (except for the last chunk to be assigned, which may have fewer
2954 // than k iterations).
2956 // When schedule(auto) is specified, the decision regarding scheduling is
2957 // delegated to the compiler and/or runtime system. The programmer gives the
2958 // implementation the freedom to choose any possible mapping of iterations to
2959 // threads in the team.
2961 // When schedule(runtime) is specified, the decision regarding scheduling is
2962 // deferred until run time, and the schedule and chunk size are taken from the
2963 // run-sched-var ICV. If the ICV is set to auto, the schedule is
2964 // implementation defined
2966 // while(__kmpc_dispatch_next(&LB, &UB)) {
2968 // while (idx <= UB) { BODY; ++idx;
2969 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2973 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2974 // When schedule(static, chunk_size) is specified, iterations are divided into
2975 // chunks of size chunk_size, and the chunks are assigned to the threads in
2976 // the team in a round-robin fashion in the order of the thread number.
2978 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2979 // while (idx <= UB) { BODY; ++idx; } // inner loop
2985 const Expr
*IVExpr
= S
.getIterationVariable();
2986 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
2987 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
2989 if (DynamicOrOrdered
) {
2990 const std::pair
<llvm::Value
*, llvm::Value
*> DispatchBounds
=
2991 CGDispatchBounds(*this, S
, LoopArgs
.LB
, LoopArgs
.UB
);
2992 llvm::Value
*LBVal
= DispatchBounds
.first
;
2993 llvm::Value
*UBVal
= DispatchBounds
.second
;
2994 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues
= {LBVal
, UBVal
,
2996 RT
.emitForDispatchInit(*this, S
.getBeginLoc(), ScheduleKind
, IVSize
,
2997 IVSigned
, Ordered
, DipatchRTInputValues
);
2999 CGOpenMPRuntime::StaticRTInput
StaticInit(
3000 IVSize
, IVSigned
, Ordered
, LoopArgs
.IL
, LoopArgs
.LB
, LoopArgs
.UB
,
3001 LoopArgs
.ST
, LoopArgs
.Chunk
);
3002 RT
.emitForStaticInit(*this, S
.getBeginLoc(), S
.getDirectiveKind(),
3003 ScheduleKind
, StaticInit
);
3006 auto &&CodeGenOrdered
= [Ordered
](CodeGenFunction
&CGF
, SourceLocation Loc
,
3007 const unsigned IVSize
,
3008 const bool IVSigned
) {
3010 CGF
.CGM
.getOpenMPRuntime().emitForOrderedIterationEnd(CGF
, Loc
, IVSize
,
3015 OMPLoopArguments
OuterLoopArgs(LoopArgs
.LB
, LoopArgs
.UB
, LoopArgs
.ST
,
3016 LoopArgs
.IL
, LoopArgs
.Chunk
, LoopArgs
.EUB
);
3017 OuterLoopArgs
.IncExpr
= S
.getInc();
3018 OuterLoopArgs
.Init
= S
.getInit();
3019 OuterLoopArgs
.Cond
= S
.getCond();
3020 OuterLoopArgs
.NextLB
= S
.getNextLowerBound();
3021 OuterLoopArgs
.NextUB
= S
.getNextUpperBound();
3022 EmitOMPOuterLoop(DynamicOrOrdered
, IsMonotonic
, S
, LoopScope
, OuterLoopArgs
,
3023 emitOMPLoopBodyWithStopPoint
, CodeGenOrdered
);
3026 static void emitEmptyOrdered(CodeGenFunction
&, SourceLocation Loc
,
3027 const unsigned IVSize
, const bool IVSigned
) {}
3029 void CodeGenFunction::EmitOMPDistributeOuterLoop(
3030 OpenMPDistScheduleClauseKind ScheduleKind
, const OMPLoopDirective
&S
,
3031 OMPPrivateScope
&LoopScope
, const OMPLoopArguments
&LoopArgs
,
3032 const CodeGenLoopTy
&CodeGenLoopContent
) {
3034 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
3037 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3041 const Expr
*IVExpr
= S
.getIterationVariable();
3042 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
3043 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
3045 CGOpenMPRuntime::StaticRTInput
StaticInit(
3046 IVSize
, IVSigned
, /* Ordered = */ false, LoopArgs
.IL
, LoopArgs
.LB
,
3047 LoopArgs
.UB
, LoopArgs
.ST
, LoopArgs
.Chunk
);
3048 RT
.emitDistributeStaticInit(*this, S
.getBeginLoc(), ScheduleKind
, StaticInit
);
3050 // for combined 'distribute' and 'for' the increment expression of distribute
3051 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3053 if (isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind()))
3054 IncExpr
= S
.getDistInc();
3056 IncExpr
= S
.getInc();
3058 // this routine is shared by 'omp distribute parallel for' and
3059 // 'omp distribute': select the right EUB expression depending on the
3061 OMPLoopArguments OuterLoopArgs
;
3062 OuterLoopArgs
.LB
= LoopArgs
.LB
;
3063 OuterLoopArgs
.UB
= LoopArgs
.UB
;
3064 OuterLoopArgs
.ST
= LoopArgs
.ST
;
3065 OuterLoopArgs
.IL
= LoopArgs
.IL
;
3066 OuterLoopArgs
.Chunk
= LoopArgs
.Chunk
;
3067 OuterLoopArgs
.EUB
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3068 ? S
.getCombinedEnsureUpperBound()
3069 : S
.getEnsureUpperBound();
3070 OuterLoopArgs
.IncExpr
= IncExpr
;
3071 OuterLoopArgs
.Init
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3072 ? S
.getCombinedInit()
3074 OuterLoopArgs
.Cond
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3075 ? S
.getCombinedCond()
3077 OuterLoopArgs
.NextLB
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3078 ? S
.getCombinedNextLowerBound()
3079 : S
.getNextLowerBound();
3080 OuterLoopArgs
.NextUB
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3081 ? S
.getCombinedNextUpperBound()
3082 : S
.getNextUpperBound();
3084 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S
,
3085 LoopScope
, OuterLoopArgs
, CodeGenLoopContent
,
3089 static std::pair
<LValue
, LValue
>
3090 emitDistributeParallelForInnerBounds(CodeGenFunction
&CGF
,
3091 const OMPExecutableDirective
&S
) {
3092 const OMPLoopDirective
&LS
= cast
<OMPLoopDirective
>(S
);
3094 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getLowerBoundVariable()));
3096 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getUpperBoundVariable()));
3098 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3099 // parallel for') we need to use the 'distribute'
3100 // chunk lower and upper bounds rather than the whole loop iteration
3101 // space. These are parameters to the outlined function for 'parallel'
3102 // and we copy the bounds of the previous schedule into the
3103 // the current ones.
3104 LValue PrevLB
= CGF
.EmitLValue(LS
.getPrevLowerBoundVariable());
3105 LValue PrevUB
= CGF
.EmitLValue(LS
.getPrevUpperBoundVariable());
3106 llvm::Value
*PrevLBVal
= CGF
.EmitLoadOfScalar(
3107 PrevLB
, LS
.getPrevLowerBoundVariable()->getExprLoc());
3108 PrevLBVal
= CGF
.EmitScalarConversion(
3109 PrevLBVal
, LS
.getPrevLowerBoundVariable()->getType(),
3110 LS
.getIterationVariable()->getType(),
3111 LS
.getPrevLowerBoundVariable()->getExprLoc());
3112 llvm::Value
*PrevUBVal
= CGF
.EmitLoadOfScalar(
3113 PrevUB
, LS
.getPrevUpperBoundVariable()->getExprLoc());
3114 PrevUBVal
= CGF
.EmitScalarConversion(
3115 PrevUBVal
, LS
.getPrevUpperBoundVariable()->getType(),
3116 LS
.getIterationVariable()->getType(),
3117 LS
.getPrevUpperBoundVariable()->getExprLoc());
3119 CGF
.EmitStoreOfScalar(PrevLBVal
, LB
);
3120 CGF
.EmitStoreOfScalar(PrevUBVal
, UB
);
3125 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3126 /// we need to use the LB and UB expressions generated by the worksharing
3127 /// code generation support, whereas in non combined situations we would
3128 /// just emit 0 and the LastIteration expression
3129 /// This function is necessary due to the difference of the LB and UB
3130 /// types for the RT emission routines for 'for_static_init' and
3131 /// 'for_dispatch_init'
3132 static std::pair
<llvm::Value
*, llvm::Value
*>
3133 emitDistributeParallelForDispatchBounds(CodeGenFunction
&CGF
,
3134 const OMPExecutableDirective
&S
,
3135 Address LB
, Address UB
) {
3136 const OMPLoopDirective
&LS
= cast
<OMPLoopDirective
>(S
);
3137 const Expr
*IVExpr
= LS
.getIterationVariable();
3138 // when implementing a dynamic schedule for a 'for' combined with a
3139 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3140 // is not normalized as each team only executes its own assigned
3142 QualType IteratorTy
= IVExpr
->getType();
3143 llvm::Value
*LBVal
=
3144 CGF
.EmitLoadOfScalar(LB
, /*Volatile=*/false, IteratorTy
, S
.getBeginLoc());
3145 llvm::Value
*UBVal
=
3146 CGF
.EmitLoadOfScalar(UB
, /*Volatile=*/false, IteratorTy
, S
.getBeginLoc());
3147 return {LBVal
, UBVal
};
3150 static void emitDistributeParallelForDistributeInnerBoundParams(
3151 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
3152 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
) {
3153 const auto &Dir
= cast
<OMPLoopDirective
>(S
);
3155 CGF
.EmitLValue(cast
<DeclRefExpr
>(Dir
.getCombinedLowerBoundVariable()));
3156 llvm::Value
*LBCast
=
3157 CGF
.Builder
.CreateIntCast(CGF
.Builder
.CreateLoad(LB
.getAddress(CGF
)),
3158 CGF
.SizeTy
, /*isSigned=*/false);
3159 CapturedVars
.push_back(LBCast
);
3161 CGF
.EmitLValue(cast
<DeclRefExpr
>(Dir
.getCombinedUpperBoundVariable()));
3163 llvm::Value
*UBCast
=
3164 CGF
.Builder
.CreateIntCast(CGF
.Builder
.CreateLoad(UB
.getAddress(CGF
)),
3165 CGF
.SizeTy
, /*isSigned=*/false);
3166 CapturedVars
.push_back(UBCast
);
3170 emitInnerParallelForWhenCombined(CodeGenFunction
&CGF
,
3171 const OMPLoopDirective
&S
,
3172 CodeGenFunction::JumpDest LoopExit
) {
3173 auto &&CGInlinedWorksharingLoop
= [&S
](CodeGenFunction
&CGF
,
3174 PrePostActionTy
&Action
) {
3176 bool HasCancel
= false;
3177 if (!isOpenMPSimdDirective(S
.getDirectiveKind())) {
3178 if (const auto *D
= dyn_cast
<OMPTeamsDistributeParallelForDirective
>(&S
))
3179 HasCancel
= D
->hasCancel();
3180 else if (const auto *D
= dyn_cast
<OMPDistributeParallelForDirective
>(&S
))
3181 HasCancel
= D
->hasCancel();
3182 else if (const auto *D
=
3183 dyn_cast
<OMPTargetTeamsDistributeParallelForDirective
>(&S
))
3184 HasCancel
= D
->hasCancel();
3186 CodeGenFunction::OMPCancelStackRAII
CancelRegion(CGF
, S
.getDirectiveKind(),
3188 CGF
.EmitOMPWorksharingLoop(S
, S
.getPrevEnsureUpperBound(),
3189 emitDistributeParallelForInnerBounds
,
3190 emitDistributeParallelForDispatchBounds
);
3193 emitCommonOMPParallelDirective(
3195 isOpenMPSimdDirective(S
.getDirectiveKind()) ? OMPD_for_simd
: OMPD_for
,
3196 CGInlinedWorksharingLoop
,
3197 emitDistributeParallelForDistributeInnerBoundParams
);
3200 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3201 const OMPDistributeParallelForDirective
&S
) {
3202 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3203 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
3206 OMPLexicalScope
Scope(*this, S
, OMPD_parallel
);
3207 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute
, CodeGen
);
3210 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3211 const OMPDistributeParallelForSimdDirective
&S
) {
3212 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3213 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
3216 OMPLexicalScope
Scope(*this, S
, OMPD_parallel
);
3217 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute
, CodeGen
);
3220 void CodeGenFunction::EmitOMPDistributeSimdDirective(
3221 const OMPDistributeSimdDirective
&S
) {
3222 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3223 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
3225 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
3226 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
, CodeGen
);
3229 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3230 CodeGenModule
&CGM
, StringRef ParentName
, const OMPTargetSimdDirective
&S
) {
3231 // Emit SPMD target parallel for region as a standalone region.
3232 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
3233 emitOMPSimdRegion(CGF
, S
, Action
);
3236 llvm::Constant
*Addr
;
3237 // Emit target region as a standalone region.
3238 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
3239 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
3240 assert(Fn
&& Addr
&& "Target device function emission failed.");
3243 void CodeGenFunction::EmitOMPTargetSimdDirective(
3244 const OMPTargetSimdDirective
&S
) {
3245 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
3246 emitOMPSimdRegion(CGF
, S
, Action
);
3248 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
3252 struct ScheduleKindModifiersTy
{
3253 OpenMPScheduleClauseKind Kind
;
3254 OpenMPScheduleClauseModifier M1
;
3255 OpenMPScheduleClauseModifier M2
;
3256 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind
,
3257 OpenMPScheduleClauseModifier M1
,
3258 OpenMPScheduleClauseModifier M2
)
3259 : Kind(Kind
), M1(M1
), M2(M2
) {}
3263 bool CodeGenFunction::EmitOMPWorksharingLoop(
3264 const OMPLoopDirective
&S
, Expr
*EUB
,
3265 const CodeGenLoopBoundsTy
&CodeGenLoopBounds
,
3266 const CodeGenDispatchBoundsTy
&CGDispatchBounds
) {
3267 // Emit the loop iteration variable.
3268 const auto *IVExpr
= cast
<DeclRefExpr
>(S
.getIterationVariable());
3269 const auto *IVDecl
= cast
<VarDecl
>(IVExpr
->getDecl());
3270 EmitVarDecl(*IVDecl
);
3272 // Emit the iterations count variable.
3273 // If it is not a variable, Sema decided to calculate iterations count on each
3274 // iteration (e.g., it is foldable into a constant).
3275 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
3276 EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
3277 // Emit calculation of the iterations count.
3278 EmitIgnoredExpr(S
.getCalcLastIteration());
3281 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
3283 bool HasLastprivateClause
;
3284 // Check pre-condition.
3286 OMPLoopScope
PreInitScope(*this, S
);
3287 // Skip the entire loop if we don't meet the precondition.
3288 // If the condition constant folds and can be elided, avoid emitting the
3291 llvm::BasicBlock
*ContBlock
= nullptr;
3292 if (ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
3296 llvm::BasicBlock
*ThenBlock
= createBasicBlock("omp.precond.then");
3297 ContBlock
= createBasicBlock("omp.precond.end");
3298 emitPreCond(*this, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
3299 getProfileCount(&S
));
3300 EmitBlock(ThenBlock
);
3301 incrementProfileCounter(&S
);
3304 RunCleanupsScope
DoacrossCleanupScope(*this);
3305 bool Ordered
= false;
3306 if (const auto *OrderedClause
= S
.getSingleClause
<OMPOrderedClause
>()) {
3307 if (OrderedClause
->getNumForLoops())
3308 RT
.emitDoacrossInit(*this, S
, OrderedClause
->getLoopNumIterations());
3313 llvm::DenseSet
<const Expr
*> EmittedFinals
;
3314 emitAlignedClause(*this, S
);
3315 bool HasLinears
= EmitOMPLinearClauseInit(S
);
3316 // Emit helper vars inits.
3318 std::pair
<LValue
, LValue
> Bounds
= CodeGenLoopBounds(*this, S
);
3319 LValue LB
= Bounds
.first
;
3320 LValue UB
= Bounds
.second
;
3322 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getStrideVariable()));
3324 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getIsLastIterVariable()));
3326 // Emit 'then' code.
3328 OMPPrivateScope
LoopScope(*this);
3329 if (EmitOMPFirstprivateClause(S
, LoopScope
) || HasLinears
) {
3330 // Emit implicit barrier to synchronize threads and avoid data races on
3331 // initialization of firstprivate variables and post-update of
3332 // lastprivate variables.
3333 CGM
.getOpenMPRuntime().emitBarrierCall(
3334 *this, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
3335 /*ForceSimpleCall=*/true);
3337 EmitOMPPrivateClause(S
, LoopScope
);
3338 CGOpenMPRuntime::LastprivateConditionalRAII
LPCRegion(
3339 *this, S
, EmitLValue(S
.getIterationVariable()));
3340 HasLastprivateClause
= EmitOMPLastprivateClauseInit(S
, LoopScope
);
3341 EmitOMPReductionClauseInit(S
, LoopScope
);
3342 EmitOMPPrivateLoopCounters(S
, LoopScope
);
3343 EmitOMPLinearClause(S
, LoopScope
);
3344 (void)LoopScope
.Privatize();
3345 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
3346 CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S
);
3348 // Detect the loop schedule kind and chunk.
3349 const Expr
*ChunkExpr
= nullptr;
3350 OpenMPScheduleTy ScheduleKind
;
3351 if (const auto *C
= S
.getSingleClause
<OMPScheduleClause
>()) {
3352 ScheduleKind
.Schedule
= C
->getScheduleKind();
3353 ScheduleKind
.M1
= C
->getFirstScheduleModifier();
3354 ScheduleKind
.M2
= C
->getSecondScheduleModifier();
3355 ChunkExpr
= C
->getChunkSize();
3357 // Default behaviour for schedule clause.
3358 CGM
.getOpenMPRuntime().getDefaultScheduleAndChunk(
3359 *this, S
, ScheduleKind
.Schedule
, ChunkExpr
);
3361 bool HasChunkSizeOne
= false;
3362 llvm::Value
*Chunk
= nullptr;
3364 Chunk
= EmitScalarExpr(ChunkExpr
);
3365 Chunk
= EmitScalarConversion(Chunk
, ChunkExpr
->getType(),
3366 S
.getIterationVariable()->getType(),
3368 Expr::EvalResult Result
;
3369 if (ChunkExpr
->EvaluateAsInt(Result
, getContext())) {
3370 llvm::APSInt EvaluatedChunk
= Result
.Val
.getInt();
3371 HasChunkSizeOne
= (EvaluatedChunk
.getLimitedValue() == 1);
3374 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
3375 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
3376 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3377 // If the static schedule kind is specified or if the ordered clause is
3378 // specified, and if no monotonic modifier is specified, the effect will
3379 // be as if the monotonic modifier was specified.
3380 bool StaticChunkedOne
=
3381 RT
.isStaticChunked(ScheduleKind
.Schedule
,
3382 /* Chunked */ Chunk
!= nullptr) &&
3384 isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind());
3387 (ScheduleKind
.Schedule
== OMPC_SCHEDULE_static
&&
3388 !(ScheduleKind
.M1
== OMPC_SCHEDULE_MODIFIER_nonmonotonic
||
3389 ScheduleKind
.M2
== OMPC_SCHEDULE_MODIFIER_nonmonotonic
)) ||
3390 ScheduleKind
.M1
== OMPC_SCHEDULE_MODIFIER_monotonic
||
3391 ScheduleKind
.M2
== OMPC_SCHEDULE_MODIFIER_monotonic
;
3392 if ((RT
.isStaticNonchunked(ScheduleKind
.Schedule
,
3393 /* Chunked */ Chunk
!= nullptr) ||
3394 StaticChunkedOne
) &&
3397 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3400 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3401 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
3402 CGF
.EmitOMPSimdInit(S
);
3403 } else if (const auto *C
= S
.getSingleClause
<OMPOrderClause
>()) {
3404 if (C
->getKind() == OMPC_ORDER_concurrent
)
3405 CGF
.LoopStack
.setParallel(/*Enable=*/true);
3408 [IVSize
, IVSigned
, Ordered
, IL
, LB
, UB
, ST
, StaticChunkedOne
, Chunk
,
3409 &S
, ScheduleKind
, LoopExit
,
3410 &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3411 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3412 // When no chunk_size is specified, the iteration space is divided
3413 // into chunks that are approximately equal in size, and at most
3414 // one chunk is distributed to each thread. Note that the size of
3415 // the chunks is unspecified in this case.
3416 CGOpenMPRuntime::StaticRTInput
StaticInit(
3417 IVSize
, IVSigned
, Ordered
, IL
.getAddress(CGF
),
3418 LB
.getAddress(CGF
), UB
.getAddress(CGF
), ST
.getAddress(CGF
),
3419 StaticChunkedOne
? Chunk
: nullptr);
3420 CGF
.CGM
.getOpenMPRuntime().emitForStaticInit(
3421 CGF
, S
.getBeginLoc(), S
.getDirectiveKind(), ScheduleKind
,
3423 // UB = min(UB, GlobalUB);
3424 if (!StaticChunkedOne
)
3425 CGF
.EmitIgnoredExpr(S
.getEnsureUpperBound());
3427 CGF
.EmitIgnoredExpr(S
.getInit());
3428 // For unchunked static schedule generate:
3430 // while (idx <= UB) {
3435 // For static schedule with chunk one:
3437 // while (IV <= PrevUB) {
3441 CGF
.EmitOMPInnerLoop(
3442 S
, LoopScope
.requiresCleanups(),
3443 StaticChunkedOne
? S
.getCombinedParForInDistCond()
3445 StaticChunkedOne
? S
.getDistInc() : S
.getInc(),
3446 [&S
, LoopExit
](CodeGenFunction
&CGF
) {
3447 emitOMPLoopBodyWithStopPoint(CGF
, S
, LoopExit
);
3449 [](CodeGenFunction
&) {});
3451 EmitBlock(LoopExit
.getBlock());
3452 // Tell the runtime we are done.
3453 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
) {
3454 CGF
.CGM
.getOpenMPRuntime().emitForStaticFinish(CGF
, S
.getEndLoc(),
3455 S
.getDirectiveKind());
3457 OMPCancelStack
.emitExit(*this, S
.getDirectiveKind(), CodeGen
);
3459 // Emit the outer loop, which requests its work chunk [LB..UB] from
3460 // runtime and runs the inner loop to process it.
3461 const OMPLoopArguments
LoopArguments(
3462 LB
.getAddress(*this), UB
.getAddress(*this), ST
.getAddress(*this),
3463 IL
.getAddress(*this), Chunk
, EUB
);
3464 EmitOMPForOuterLoop(ScheduleKind
, IsMonotonic
, S
, LoopScope
, Ordered
,
3465 LoopArguments
, CGDispatchBounds
);
3467 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
3468 EmitOMPSimdFinal(S
, [IL
, &S
](CodeGenFunction
&CGF
) {
3469 return CGF
.Builder
.CreateIsNotNull(
3470 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
3473 EmitOMPReductionClauseFinal(
3474 S
, /*ReductionKind=*/isOpenMPSimdDirective(S
.getDirectiveKind())
3475 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3476 : /*Parallel only*/ OMPD_parallel
);
3477 // Emit post-update of the reduction variables if IsLastIter != 0.
3478 emitPostUpdateForReductionClause(
3479 *this, S
, [IL
, &S
](CodeGenFunction
&CGF
) {
3480 return CGF
.Builder
.CreateIsNotNull(
3481 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
3483 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3484 if (HasLastprivateClause
)
3485 EmitOMPLastprivateClauseFinal(
3486 S
, isOpenMPSimdDirective(S
.getDirectiveKind()),
3487 Builder
.CreateIsNotNull(EmitLoadOfScalar(IL
, S
.getBeginLoc())));
3488 LoopScope
.restoreMap();
3489 EmitOMPLinearClauseFinal(S
, [IL
, &S
](CodeGenFunction
&CGF
) {
3490 return CGF
.Builder
.CreateIsNotNull(
3491 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
3494 DoacrossCleanupScope
.ForceCleanup();
3495 // We're now done with the loop, so jump to the continuation block.
3497 EmitBranch(ContBlock
);
3498 EmitBlock(ContBlock
, /*IsFinished=*/true);
3501 return HasLastprivateClause
;
3504 /// The following two functions generate expressions for the loop lower
3505 /// and upper bounds in case of static and dynamic (dispatch) schedule
3506 /// of the associated 'for' or 'distribute' loop.
3507 static std::pair
<LValue
, LValue
>
3508 emitForLoopBounds(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
3509 const auto &LS
= cast
<OMPLoopDirective
>(S
);
3511 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getLowerBoundVariable()));
3513 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getUpperBoundVariable()));
3517 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3518 /// consider the lower and upper bound expressions generated by the
3519 /// worksharing loop support, but we use 0 and the iteration space size as
3521 static std::pair
<llvm::Value
*, llvm::Value
*>
3522 emitDispatchForLoopBounds(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
3523 Address LB
, Address UB
) {
3524 const auto &LS
= cast
<OMPLoopDirective
>(S
);
3525 const Expr
*IVExpr
= LS
.getIterationVariable();
3526 const unsigned IVSize
= CGF
.getContext().getTypeSize(IVExpr
->getType());
3527 llvm::Value
*LBVal
= CGF
.Builder
.getIntN(IVSize
, 0);
3528 llvm::Value
*UBVal
= CGF
.EmitScalarExpr(LS
.getLastIteration());
3529 return {LBVal
, UBVal
};
3532 /// Emits internal temp array declarations for the directive with inscan
3534 /// The code is the following:
3536 /// size num_iters = <num_iters>;
3537 /// <type> buffer[num_iters];
3539 static void emitScanBasedDirectiveDecls(
3540 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
3541 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> NumIteratorsGen
) {
3542 llvm::Value
*OMPScanNumIterations
= CGF
.Builder
.CreateIntCast(
3543 NumIteratorsGen(CGF
), CGF
.SizeTy
, /*isSigned=*/false);
3544 SmallVector
<const Expr
*, 4> Shareds
;
3545 SmallVector
<const Expr
*, 4> Privates
;
3546 SmallVector
<const Expr
*, 4> ReductionOps
;
3547 SmallVector
<const Expr
*, 4> CopyArrayTemps
;
3548 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
3549 assert(C
->getModifier() == OMPC_REDUCTION_inscan
&&
3550 "Only inscan reductions are expected.");
3551 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
3552 Privates
.append(C
->privates().begin(), C
->privates().end());
3553 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
3554 CopyArrayTemps
.append(C
->copy_array_temps().begin(),
3555 C
->copy_array_temps().end());
3558 // Emit buffers for each reduction variables.
3559 // ReductionCodeGen is required to emit correctly the code for array
3561 ReductionCodeGen
RedCG(Shareds
, Shareds
, Privates
, ReductionOps
);
3563 auto *ITA
= CopyArrayTemps
.begin();
3564 for (const Expr
*IRef
: Privates
) {
3565 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IRef
)->getDecl());
3566 // Emit variably modified arrays, used for arrays/array sections
3568 if (PrivateVD
->getType()->isVariablyModifiedType()) {
3569 RedCG
.emitSharedOrigLValue(CGF
, Count
);
3570 RedCG
.emitAggregateType(CGF
, Count
);
3572 CodeGenFunction::OpaqueValueMapping
DimMapping(
3574 cast
<OpaqueValueExpr
>(
3575 cast
<VariableArrayType
>((*ITA
)->getType()->getAsArrayTypeUnsafe())
3577 RValue::get(OMPScanNumIterations
));
3578 // Emit temp buffer.
3579 CGF
.EmitVarDecl(*cast
<VarDecl
>(cast
<DeclRefExpr
>(*ITA
)->getDecl()));
3586 /// Copies final inscan reductions values to the original variables.
3587 /// The code is the following:
3589 /// <orig_var> = buffer[num_iters-1];
3591 static void emitScanBasedDirectiveFinals(
3592 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
3593 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> NumIteratorsGen
) {
3594 llvm::Value
*OMPScanNumIterations
= CGF
.Builder
.CreateIntCast(
3595 NumIteratorsGen(CGF
), CGF
.SizeTy
, /*isSigned=*/false);
3596 SmallVector
<const Expr
*, 4> Shareds
;
3597 SmallVector
<const Expr
*, 4> LHSs
;
3598 SmallVector
<const Expr
*, 4> RHSs
;
3599 SmallVector
<const Expr
*, 4> Privates
;
3600 SmallVector
<const Expr
*, 4> CopyOps
;
3601 SmallVector
<const Expr
*, 4> CopyArrayElems
;
3602 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
3603 assert(C
->getModifier() == OMPC_REDUCTION_inscan
&&
3604 "Only inscan reductions are expected.");
3605 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
3606 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
3607 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
3608 Privates
.append(C
->privates().begin(), C
->privates().end());
3609 CopyOps
.append(C
->copy_ops().begin(), C
->copy_ops().end());
3610 CopyArrayElems
.append(C
->copy_array_elems().begin(),
3611 C
->copy_array_elems().end());
3613 // Create temp var and copy LHS value to this temp value.
3614 // LHS = TMP[LastIter];
3615 llvm::Value
*OMPLast
= CGF
.Builder
.CreateNSWSub(
3616 OMPScanNumIterations
,
3617 llvm::ConstantInt::get(CGF
.SizeTy
, 1, /*isSigned=*/false));
3618 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
3619 const Expr
*PrivateExpr
= Privates
[I
];
3620 const Expr
*OrigExpr
= Shareds
[I
];
3621 const Expr
*CopyArrayElem
= CopyArrayElems
[I
];
3622 CodeGenFunction::OpaqueValueMapping
IdxMapping(
3624 cast
<OpaqueValueExpr
>(
3625 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
3626 RValue::get(OMPLast
));
3627 LValue DestLVal
= CGF
.EmitLValue(OrigExpr
);
3628 LValue SrcLVal
= CGF
.EmitLValue(CopyArrayElem
);
3629 CGF
.EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(CGF
),
3630 SrcLVal
.getAddress(CGF
),
3631 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
3632 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
3637 /// Emits the code for the directive with inscan reductions.
3638 /// The code is the following:
3641 /// for (i: 0..<num_iters>) {
3643 /// buffer[i] = red;
3645 /// #pragma omp master // in parallel region
3646 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3647 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3648 /// buffer[i] op= buffer[i-pow(2,k)];
3649 /// #pragma omp barrier // in parallel region
3651 /// for (0..<num_iters>) {
3652 /// red = InclusiveScan ? buffer[i] : buffer[i-1];
3656 static void emitScanBasedDirective(
3657 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
3658 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> NumIteratorsGen
,
3659 llvm::function_ref
<void(CodeGenFunction
&)> FirstGen
,
3660 llvm::function_ref
<void(CodeGenFunction
&)> SecondGen
) {
3661 llvm::Value
*OMPScanNumIterations
= CGF
.Builder
.CreateIntCast(
3662 NumIteratorsGen(CGF
), CGF
.SizeTy
, /*isSigned=*/false);
3663 SmallVector
<const Expr
*, 4> Privates
;
3664 SmallVector
<const Expr
*, 4> ReductionOps
;
3665 SmallVector
<const Expr
*, 4> LHSs
;
3666 SmallVector
<const Expr
*, 4> RHSs
;
3667 SmallVector
<const Expr
*, 4> CopyArrayElems
;
3668 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
3669 assert(C
->getModifier() == OMPC_REDUCTION_inscan
&&
3670 "Only inscan reductions are expected.");
3671 Privates
.append(C
->privates().begin(), C
->privates().end());
3672 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
3673 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
3674 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
3675 CopyArrayElems
.append(C
->copy_array_elems().begin(),
3676 C
->copy_array_elems().end());
3678 CodeGenFunction::ParentLoopDirectiveForScanRegion
ScanRegion(CGF
, S
);
3680 // Emit loop with input phase:
3682 // for (i: 0..<num_iters>) {
3686 CGF
.OMPFirstScanLoop
= true;
3687 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
3690 // #pragma omp barrier // in parallel region
3691 auto &&CodeGen
= [&S
, OMPScanNumIterations
, &LHSs
, &RHSs
, &CopyArrayElems
,
3693 &Privates
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
3695 // Emit prefix reduction:
3696 // #pragma omp master // in parallel region
3697 // for (int k = 0; k <= ceil(log2(n)); ++k)
3698 llvm::BasicBlock
*InputBB
= CGF
.Builder
.GetInsertBlock();
3699 llvm::BasicBlock
*LoopBB
= CGF
.createBasicBlock("omp.outer.log.scan.body");
3700 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock("omp.outer.log.scan.exit");
3702 CGF
.CGM
.getIntrinsic(llvm::Intrinsic::log2
, CGF
.DoubleTy
);
3704 CGF
.Builder
.CreateUIToFP(OMPScanNumIterations
, CGF
.DoubleTy
);
3705 llvm::Value
*LogVal
= CGF
.EmitNounwindRuntimeCall(F
, Arg
);
3706 F
= CGF
.CGM
.getIntrinsic(llvm::Intrinsic::ceil
, CGF
.DoubleTy
);
3707 LogVal
= CGF
.EmitNounwindRuntimeCall(F
, LogVal
);
3708 LogVal
= CGF
.Builder
.CreateFPToUI(LogVal
, CGF
.IntTy
);
3709 llvm::Value
*NMin1
= CGF
.Builder
.CreateNUWSub(
3710 OMPScanNumIterations
, llvm::ConstantInt::get(CGF
.SizeTy
, 1));
3711 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, S
.getBeginLoc());
3712 CGF
.EmitBlock(LoopBB
);
3713 auto *Counter
= CGF
.Builder
.CreatePHI(CGF
.IntTy
, 2);
3715 auto *Pow2K
= CGF
.Builder
.CreatePHI(CGF
.SizeTy
, 2);
3716 Counter
->addIncoming(llvm::ConstantInt::get(CGF
.IntTy
, 0), InputBB
);
3717 Pow2K
->addIncoming(llvm::ConstantInt::get(CGF
.SizeTy
, 1), InputBB
);
3718 // for (size i = n - 1; i >= 2 ^ k; --i)
3719 // tmp[i] op= tmp[i-pow2k];
3720 llvm::BasicBlock
*InnerLoopBB
=
3721 CGF
.createBasicBlock("omp.inner.log.scan.body");
3722 llvm::BasicBlock
*InnerExitBB
=
3723 CGF
.createBasicBlock("omp.inner.log.scan.exit");
3724 llvm::Value
*CmpI
= CGF
.Builder
.CreateICmpUGE(NMin1
, Pow2K
);
3725 CGF
.Builder
.CreateCondBr(CmpI
, InnerLoopBB
, InnerExitBB
);
3726 CGF
.EmitBlock(InnerLoopBB
);
3727 auto *IVal
= CGF
.Builder
.CreatePHI(CGF
.SizeTy
, 2);
3728 IVal
->addIncoming(NMin1
, LoopBB
);
3730 CodeGenFunction::OMPPrivateScope
PrivScope(CGF
);
3731 auto *ILHS
= LHSs
.begin();
3732 auto *IRHS
= RHSs
.begin();
3733 for (const Expr
*CopyArrayElem
: CopyArrayElems
) {
3734 const auto *LHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
3735 const auto *RHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
3736 Address LHSAddr
= Address::invalid();
3738 CodeGenFunction::OpaqueValueMapping
IdxMapping(
3740 cast
<OpaqueValueExpr
>(
3741 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
3743 LHSAddr
= CGF
.EmitLValue(CopyArrayElem
).getAddress(CGF
);
3745 PrivScope
.addPrivate(LHSVD
, LHSAddr
);
3746 Address RHSAddr
= Address::invalid();
3748 llvm::Value
*OffsetIVal
= CGF
.Builder
.CreateNUWSub(IVal
, Pow2K
);
3749 CodeGenFunction::OpaqueValueMapping
IdxMapping(
3751 cast
<OpaqueValueExpr
>(
3752 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
3753 RValue::get(OffsetIVal
));
3754 RHSAddr
= CGF
.EmitLValue(CopyArrayElem
).getAddress(CGF
);
3756 PrivScope
.addPrivate(RHSVD
, RHSAddr
);
3760 PrivScope
.Privatize();
3761 CGF
.CGM
.getOpenMPRuntime().emitReduction(
3762 CGF
, S
.getEndLoc(), Privates
, LHSs
, RHSs
, ReductionOps
,
3763 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown
});
3765 llvm::Value
*NextIVal
=
3766 CGF
.Builder
.CreateNUWSub(IVal
, llvm::ConstantInt::get(CGF
.SizeTy
, 1));
3767 IVal
->addIncoming(NextIVal
, CGF
.Builder
.GetInsertBlock());
3768 CmpI
= CGF
.Builder
.CreateICmpUGE(NextIVal
, Pow2K
);
3769 CGF
.Builder
.CreateCondBr(CmpI
, InnerLoopBB
, InnerExitBB
);
3770 CGF
.EmitBlock(InnerExitBB
);
3772 CGF
.Builder
.CreateNUWAdd(Counter
, llvm::ConstantInt::get(CGF
.IntTy
, 1));
3773 Counter
->addIncoming(Next
, CGF
.Builder
.GetInsertBlock());
3775 llvm::Value
*NextPow2K
=
3776 CGF
.Builder
.CreateShl(Pow2K
, 1, "", /*HasNUW=*/true);
3777 Pow2K
->addIncoming(NextPow2K
, CGF
.Builder
.GetInsertBlock());
3778 llvm::Value
*Cmp
= CGF
.Builder
.CreateICmpNE(Next
, LogVal
);
3779 CGF
.Builder
.CreateCondBr(Cmp
, LoopBB
, ExitBB
);
3780 auto DL1
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, S
.getEndLoc());
3781 CGF
.EmitBlock(ExitBB
);
3783 if (isOpenMPParallelDirective(S
.getDirectiveKind())) {
3784 CGF
.CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, CodeGen
, S
.getBeginLoc());
3785 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(
3786 CGF
, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
3787 /*ForceSimpleCall=*/true);
3789 RegionCodeGenTy
RCG(CodeGen
);
3793 CGF
.OMPFirstScanLoop
= false;
3797 static bool emitWorksharingDirective(CodeGenFunction
&CGF
,
3798 const OMPLoopDirective
&S
,
3800 bool HasLastprivates
;
3801 if (llvm::any_of(S
.getClausesOfKind
<OMPReductionClause
>(),
3802 [](const OMPReductionClause
*C
) {
3803 return C
->getModifier() == OMPC_REDUCTION_inscan
;
3805 const auto &&NumIteratorsGen
= [&S
](CodeGenFunction
&CGF
) {
3806 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
3807 OMPLoopScope
LoopScope(CGF
, S
);
3808 return CGF
.EmitScalarExpr(S
.getNumIterations());
3810 const auto &&FirstGen
= [&S
, HasCancel
](CodeGenFunction
&CGF
) {
3811 CodeGenFunction::OMPCancelStackRAII
CancelRegion(
3812 CGF
, S
.getDirectiveKind(), HasCancel
);
3813 (void)CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(),
3815 emitDispatchForLoopBounds
);
3816 // Emit an implicit barrier at the end.
3817 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(CGF
, S
.getBeginLoc(),
3820 const auto &&SecondGen
= [&S
, HasCancel
,
3821 &HasLastprivates
](CodeGenFunction
&CGF
) {
3822 CodeGenFunction::OMPCancelStackRAII
CancelRegion(
3823 CGF
, S
.getDirectiveKind(), HasCancel
);
3824 HasLastprivates
= CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(),
3826 emitDispatchForLoopBounds
);
3828 if (!isOpenMPParallelDirective(S
.getDirectiveKind()))
3829 emitScanBasedDirectiveDecls(CGF
, S
, NumIteratorsGen
);
3830 emitScanBasedDirective(CGF
, S
, NumIteratorsGen
, FirstGen
, SecondGen
);
3831 if (!isOpenMPParallelDirective(S
.getDirectiveKind()))
3832 emitScanBasedDirectiveFinals(CGF
, S
, NumIteratorsGen
);
3834 CodeGenFunction::OMPCancelStackRAII
CancelRegion(CGF
, S
.getDirectiveKind(),
3836 HasLastprivates
= CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(),
3838 emitDispatchForLoopBounds
);
3840 return HasLastprivates
;
3843 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective
&S
) {
3846 for (OMPClause
*C
: S
.clauses()) {
3847 if (isa
<OMPNowaitClause
>(C
))
3850 if (auto *SC
= dyn_cast
<OMPScheduleClause
>(C
)) {
3851 if (SC
->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown
)
3853 if (SC
->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown
)
3855 switch (SC
->getScheduleKind()) {
3856 case OMPC_SCHEDULE_auto
:
3857 case OMPC_SCHEDULE_dynamic
:
3858 case OMPC_SCHEDULE_runtime
:
3859 case OMPC_SCHEDULE_guided
:
3860 case OMPC_SCHEDULE_static
:
3862 case OMPC_SCHEDULE_unknown
:
3873 static llvm::omp::ScheduleKind
3874 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind
) {
3875 switch (ScheduleClauseKind
) {
3876 case OMPC_SCHEDULE_unknown
:
3877 return llvm::omp::OMP_SCHEDULE_Default
;
3878 case OMPC_SCHEDULE_auto
:
3879 return llvm::omp::OMP_SCHEDULE_Auto
;
3880 case OMPC_SCHEDULE_dynamic
:
3881 return llvm::omp::OMP_SCHEDULE_Dynamic
;
3882 case OMPC_SCHEDULE_guided
:
3883 return llvm::omp::OMP_SCHEDULE_Guided
;
3884 case OMPC_SCHEDULE_runtime
:
3885 return llvm::omp::OMP_SCHEDULE_Runtime
;
3886 case OMPC_SCHEDULE_static
:
3887 return llvm::omp::OMP_SCHEDULE_Static
;
3889 llvm_unreachable("Unhandled schedule kind");
3892 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective
&S
) {
3893 bool HasLastprivates
= false;
3894 bool UseOMPIRBuilder
=
3895 CGM
.getLangOpts().OpenMPIRBuilder
&& isSupportedByOpenMPIRBuilder(S
);
3896 auto &&CodeGen
= [this, &S
, &HasLastprivates
,
3897 UseOMPIRBuilder
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3898 // Use the OpenMPIRBuilder if enabled.
3899 if (UseOMPIRBuilder
) {
3900 bool NeedsBarrier
= !S
.getSingleClause
<OMPNowaitClause
>();
3902 llvm::omp::ScheduleKind SchedKind
= llvm::omp::OMP_SCHEDULE_Default
;
3903 llvm::Value
*ChunkSize
= nullptr;
3904 if (auto *SchedClause
= S
.getSingleClause
<OMPScheduleClause
>()) {
3906 convertClauseKindToSchedKind(SchedClause
->getScheduleKind());
3907 if (const Expr
*ChunkSizeExpr
= SchedClause
->getChunkSize())
3908 ChunkSize
= EmitScalarExpr(ChunkSizeExpr
);
3911 // Emit the associated statement and get its loop representation.
3912 const Stmt
*Inner
= S
.getRawStmt();
3913 llvm::CanonicalLoopInfo
*CLI
=
3914 EmitOMPCollapsedCanonicalLoopNest(Inner
, 1);
3916 llvm::OpenMPIRBuilder
&OMPBuilder
=
3917 CGM
.getOpenMPRuntime().getOMPBuilder();
3918 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
3919 AllocaInsertPt
->getParent(), AllocaInsertPt
->getIterator());
3920 OMPBuilder
.applyWorkshareLoop(
3921 Builder
.getCurrentDebugLocation(), CLI
, AllocaIP
, NeedsBarrier
,
3922 SchedKind
, ChunkSize
, /*HasSimdModifier=*/false,
3923 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
3924 /*HasOrderedClause=*/false);
3928 HasLastprivates
= emitWorksharingDirective(CGF
, S
, S
.hasCancel());
3932 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
3933 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
3934 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for
, CodeGen
,
3938 if (!UseOMPIRBuilder
) {
3939 // Emit an implicit barrier at the end.
3940 if (!S
.getSingleClause
<OMPNowaitClause
>() || HasLastprivates
)
3941 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(), OMPD_for
);
3943 // Check for outer lastprivate conditional update.
3944 checkForLastprivateConditionalUpdate(*this, S
);
3947 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective
&S
) {
3948 bool HasLastprivates
= false;
3949 auto &&CodeGen
= [&S
, &HasLastprivates
](CodeGenFunction
&CGF
,
3950 PrePostActionTy
&) {
3951 HasLastprivates
= emitWorksharingDirective(CGF
, S
, /*HasCancel=*/false);
3955 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
3956 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
3957 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
, CodeGen
);
3960 // Emit an implicit barrier at the end.
3961 if (!S
.getSingleClause
<OMPNowaitClause
>() || HasLastprivates
)
3962 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(), OMPD_for
);
3963 // Check for outer lastprivate conditional update.
3964 checkForLastprivateConditionalUpdate(*this, S
);
3967 static LValue
createSectionLVal(CodeGenFunction
&CGF
, QualType Ty
,
3969 llvm::Value
*Init
= nullptr) {
3970 LValue LVal
= CGF
.MakeAddrLValue(CGF
.CreateMemTemp(Ty
, Name
), Ty
);
3972 CGF
.EmitStoreThroughLValue(RValue::get(Init
), LVal
, /*isInit*/ true);
3976 void CodeGenFunction::EmitSections(const OMPExecutableDirective
&S
) {
3977 const Stmt
*CapturedStmt
= S
.getInnermostCapturedStmt()->getCapturedStmt();
3978 const auto *CS
= dyn_cast
<CompoundStmt
>(CapturedStmt
);
3979 bool HasLastprivates
= false;
3980 auto &&CodeGen
= [&S
, CapturedStmt
, CS
,
3981 &HasLastprivates
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3982 const ASTContext
&C
= CGF
.getContext();
3983 QualType KmpInt32Ty
=
3984 C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3985 // Emit helper vars inits.
3986 LValue LB
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.lb.",
3987 CGF
.Builder
.getInt32(0));
3988 llvm::ConstantInt
*GlobalUBVal
= CS
!= nullptr
3989 ? CGF
.Builder
.getInt32(CS
->size() - 1)
3990 : CGF
.Builder
.getInt32(0);
3992 createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.ub.", GlobalUBVal
);
3993 LValue ST
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.st.",
3994 CGF
.Builder
.getInt32(1));
3995 LValue IL
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.il.",
3996 CGF
.Builder
.getInt32(0));
3998 LValue IV
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.iv.");
3999 OpaqueValueExpr
IVRefExpr(S
.getBeginLoc(), KmpInt32Ty
, VK_LValue
);
4000 CodeGenFunction::OpaqueValueMapping
OpaqueIV(CGF
, &IVRefExpr
, IV
);
4001 OpaqueValueExpr
UBRefExpr(S
.getBeginLoc(), KmpInt32Ty
, VK_LValue
);
4002 CodeGenFunction::OpaqueValueMapping
OpaqueUB(CGF
, &UBRefExpr
, UB
);
4003 // Generate condition for loop.
4004 BinaryOperator
*Cond
= BinaryOperator::Create(
4005 C
, &IVRefExpr
, &UBRefExpr
, BO_LE
, C
.BoolTy
, VK_PRValue
, OK_Ordinary
,
4006 S
.getBeginLoc(), FPOptionsOverride());
4007 // Increment for loop counter.
4008 UnaryOperator
*Inc
= UnaryOperator::Create(
4009 C
, &IVRefExpr
, UO_PreInc
, KmpInt32Ty
, VK_PRValue
, OK_Ordinary
,
4010 S
.getBeginLoc(), true, FPOptionsOverride());
4011 auto &&BodyGen
= [CapturedStmt
, CS
, &S
, &IV
](CodeGenFunction
&CGF
) {
4012 // Iterate through all sections and emit a switch construct:
4015 // <SectionStmt[0]>;
4018 // case <NumSection> - 1:
4019 // <SectionStmt[<NumSection> - 1]>;
4022 // .omp.sections.exit:
4023 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".omp.sections.exit");
4024 llvm::SwitchInst
*SwitchStmt
=
4025 CGF
.Builder
.CreateSwitch(CGF
.EmitLoadOfScalar(IV
, S
.getBeginLoc()),
4026 ExitBB
, CS
== nullptr ? 1 : CS
->size());
4028 unsigned CaseNumber
= 0;
4029 for (const Stmt
*SubStmt
: CS
->children()) {
4030 auto CaseBB
= CGF
.createBasicBlock(".omp.sections.case");
4031 CGF
.EmitBlock(CaseBB
);
4032 SwitchStmt
->addCase(CGF
.Builder
.getInt32(CaseNumber
), CaseBB
);
4033 CGF
.EmitStmt(SubStmt
);
4034 CGF
.EmitBranch(ExitBB
);
4038 llvm::BasicBlock
*CaseBB
= CGF
.createBasicBlock(".omp.sections.case");
4039 CGF
.EmitBlock(CaseBB
);
4040 SwitchStmt
->addCase(CGF
.Builder
.getInt32(0), CaseBB
);
4041 CGF
.EmitStmt(CapturedStmt
);
4042 CGF
.EmitBranch(ExitBB
);
4044 CGF
.EmitBlock(ExitBB
, /*IsFinished=*/true);
4047 CodeGenFunction::OMPPrivateScope
LoopScope(CGF
);
4048 if (CGF
.EmitOMPFirstprivateClause(S
, LoopScope
)) {
4049 // Emit implicit barrier to synchronize threads and avoid data races on
4050 // initialization of firstprivate variables and post-update of lastprivate
4052 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(
4053 CGF
, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
4054 /*ForceSimpleCall=*/true);
4056 CGF
.EmitOMPPrivateClause(S
, LoopScope
);
4057 CGOpenMPRuntime::LastprivateConditionalRAII
LPCRegion(CGF
, S
, IV
);
4058 HasLastprivates
= CGF
.EmitOMPLastprivateClauseInit(S
, LoopScope
);
4059 CGF
.EmitOMPReductionClauseInit(S
, LoopScope
);
4060 (void)LoopScope
.Privatize();
4061 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
4062 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
4064 // Emit static non-chunked loop.
4065 OpenMPScheduleTy ScheduleKind
;
4066 ScheduleKind
.Schedule
= OMPC_SCHEDULE_static
;
4067 CGOpenMPRuntime::StaticRTInput
StaticInit(
4068 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL
.getAddress(CGF
),
4069 LB
.getAddress(CGF
), UB
.getAddress(CGF
), ST
.getAddress(CGF
));
4070 CGF
.CGM
.getOpenMPRuntime().emitForStaticInit(
4071 CGF
, S
.getBeginLoc(), S
.getDirectiveKind(), ScheduleKind
, StaticInit
);
4072 // UB = min(UB, GlobalUB);
4073 llvm::Value
*UBVal
= CGF
.EmitLoadOfScalar(UB
, S
.getBeginLoc());
4074 llvm::Value
*MinUBGlobalUB
= CGF
.Builder
.CreateSelect(
4075 CGF
.Builder
.CreateICmpSLT(UBVal
, GlobalUBVal
), UBVal
, GlobalUBVal
);
4076 CGF
.EmitStoreOfScalar(MinUBGlobalUB
, UB
);
4078 CGF
.EmitStoreOfScalar(CGF
.EmitLoadOfScalar(LB
, S
.getBeginLoc()), IV
);
4079 // while (idx <= UB) { BODY; ++idx; }
4080 CGF
.EmitOMPInnerLoop(S
, /*RequiresCleanup=*/false, Cond
, Inc
, BodyGen
,
4081 [](CodeGenFunction
&) {});
4082 // Tell the runtime we are done.
4083 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
) {
4084 CGF
.CGM
.getOpenMPRuntime().emitForStaticFinish(CGF
, S
.getEndLoc(),
4085 S
.getDirectiveKind());
4087 CGF
.OMPCancelStack
.emitExit(CGF
, S
.getDirectiveKind(), CodeGen
);
4088 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
4089 // Emit post-update of the reduction variables if IsLastIter != 0.
4090 emitPostUpdateForReductionClause(CGF
, S
, [IL
, &S
](CodeGenFunction
&CGF
) {
4091 return CGF
.Builder
.CreateIsNotNull(
4092 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
4095 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4096 if (HasLastprivates
)
4097 CGF
.EmitOMPLastprivateClauseFinal(
4098 S
, /*NoFinals=*/false,
4099 CGF
.Builder
.CreateIsNotNull(
4100 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc())));
4103 bool HasCancel
= false;
4104 if (auto *OSD
= dyn_cast
<OMPSectionsDirective
>(&S
))
4105 HasCancel
= OSD
->hasCancel();
4106 else if (auto *OPSD
= dyn_cast
<OMPParallelSectionsDirective
>(&S
))
4107 HasCancel
= OPSD
->hasCancel();
4108 OMPCancelStackRAII
CancelRegion(*this, S
.getDirectiveKind(), HasCancel
);
4109 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections
, CodeGen
,
4111 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4112 // clause. Otherwise the barrier will be generated by the codegen for the
4114 if (HasLastprivates
&& S
.getSingleClause
<OMPNowaitClause
>()) {
4115 // Emit implicit barrier to synchronize threads and avoid data races on
4116 // initialization of firstprivate variables.
4117 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(),
4122 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective
&S
) {
4123 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4124 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4125 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4126 using BodyGenCallbackTy
= llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy
;
4128 auto FiniCB
= [this](InsertPointTy IP
) {
4129 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4132 const CapturedStmt
*ICS
= S
.getInnermostCapturedStmt();
4133 const Stmt
*CapturedStmt
= S
.getInnermostCapturedStmt()->getCapturedStmt();
4134 const auto *CS
= dyn_cast
<CompoundStmt
>(CapturedStmt
);
4135 llvm::SmallVector
<BodyGenCallbackTy
, 4> SectionCBVector
;
4137 for (const Stmt
*SubStmt
: CS
->children()) {
4138 auto SectionCB
= [this, SubStmt
](InsertPointTy AllocaIP
,
4139 InsertPointTy CodeGenIP
) {
4140 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4141 *this, SubStmt
, AllocaIP
, CodeGenIP
, "section");
4143 SectionCBVector
.push_back(SectionCB
);
4146 auto SectionCB
= [this, CapturedStmt
](InsertPointTy AllocaIP
,
4147 InsertPointTy CodeGenIP
) {
4148 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4149 *this, CapturedStmt
, AllocaIP
, CodeGenIP
, "section");
4151 SectionCBVector
.push_back(SectionCB
);
4154 // Privatization callback that performs appropriate action for
4155 // shared/private/firstprivate/lastprivate/copyin/... variables.
4157 // TODO: This defaults to shared right now.
4158 auto PrivCB
= [](InsertPointTy AllocaIP
, InsertPointTy CodeGenIP
,
4159 llvm::Value
&, llvm::Value
&Val
, llvm::Value
*&ReplVal
) {
4160 // The next line is appropriate only for variables (Val) with the
4161 // data-sharing attribute "shared".
4167 CGCapturedStmtInfo
CGSI(*ICS
, CR_OpenMP
);
4168 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(*this, &CGSI
);
4169 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
4170 AllocaInsertPt
->getParent(), AllocaInsertPt
->getIterator());
4171 Builder
.restoreIP(OMPBuilder
.createSections(
4172 Builder
, AllocaIP
, SectionCBVector
, PrivCB
, FiniCB
, S
.hasCancel(),
4173 S
.getSingleClause
<OMPNowaitClause
>()));
4178 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4179 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
4182 // Emit an implicit barrier at the end.
4183 if (!S
.getSingleClause
<OMPNowaitClause
>()) {
4184 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(),
4187 // Check for outer lastprivate conditional update.
4188 checkForLastprivateConditionalUpdate(*this, S
);
4191 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective
&S
) {
4192 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4193 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4194 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4196 const Stmt
*SectionRegionBodyStmt
= S
.getAssociatedStmt();
4197 auto FiniCB
= [this](InsertPointTy IP
) {
4198 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4201 auto BodyGenCB
= [SectionRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4202 InsertPointTy CodeGenIP
) {
4203 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4204 *this, SectionRegionBodyStmt
, AllocaIP
, CodeGenIP
, "section");
4207 LexicalScope
Scope(*this, S
.getSourceRange());
4209 Builder
.restoreIP(OMPBuilder
.createSection(Builder
, BodyGenCB
, FiniCB
));
4213 LexicalScope
Scope(*this, S
.getSourceRange());
4215 EmitStmt(S
.getAssociatedStmt());
4218 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective
&S
) {
4219 llvm::SmallVector
<const Expr
*, 8> CopyprivateVars
;
4220 llvm::SmallVector
<const Expr
*, 8> DestExprs
;
4221 llvm::SmallVector
<const Expr
*, 8> SrcExprs
;
4222 llvm::SmallVector
<const Expr
*, 8> AssignmentOps
;
4223 // Check if there are any 'copyprivate' clauses associated with this
4224 // 'single' construct.
4225 // Build a list of copyprivate variables along with helper expressions
4226 // (<source>, <destination>, <destination>=<source> expressions)
4227 for (const auto *C
: S
.getClausesOfKind
<OMPCopyprivateClause
>()) {
4228 CopyprivateVars
.append(C
->varlists().begin(), C
->varlists().end());
4229 DestExprs
.append(C
->destination_exprs().begin(),
4230 C
->destination_exprs().end());
4231 SrcExprs
.append(C
->source_exprs().begin(), C
->source_exprs().end());
4232 AssignmentOps
.append(C
->assignment_ops().begin(),
4233 C
->assignment_ops().end());
4235 // Emit code for 'single' region along with 'copyprivate' clauses
4236 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4238 OMPPrivateScope
SingleScope(CGF
);
4239 (void)CGF
.EmitOMPFirstprivateClause(S
, SingleScope
);
4240 CGF
.EmitOMPPrivateClause(S
, SingleScope
);
4241 (void)SingleScope
.Privatize();
4242 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
4246 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4247 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
4248 CGM
.getOpenMPRuntime().emitSingleRegion(*this, CodeGen
, S
.getBeginLoc(),
4249 CopyprivateVars
, DestExprs
,
4250 SrcExprs
, AssignmentOps
);
4252 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4253 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4254 if (!S
.getSingleClause
<OMPNowaitClause
>() && CopyprivateVars
.empty()) {
4255 CGM
.getOpenMPRuntime().emitBarrierCall(
4256 *this, S
.getBeginLoc(),
4257 S
.getSingleClause
<OMPNowaitClause
>() ? OMPD_unknown
: OMPD_single
);
4259 // Check for outer lastprivate conditional update.
4260 checkForLastprivateConditionalUpdate(*this, S
);
4263 static void emitMaster(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
4264 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4266 CGF
.EmitStmt(S
.getRawStmt());
4268 CGF
.CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, CodeGen
, S
.getBeginLoc());
4271 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective
&S
) {
4272 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4273 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4274 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4276 const Stmt
*MasterRegionBodyStmt
= S
.getAssociatedStmt();
4278 auto FiniCB
= [this](InsertPointTy IP
) {
4279 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4282 auto BodyGenCB
= [MasterRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4283 InsertPointTy CodeGenIP
) {
4284 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4285 *this, MasterRegionBodyStmt
, AllocaIP
, CodeGenIP
, "master");
4288 LexicalScope
Scope(*this, S
.getSourceRange());
4290 Builder
.restoreIP(OMPBuilder
.createMaster(Builder
, BodyGenCB
, FiniCB
));
4294 LexicalScope
Scope(*this, S
.getSourceRange());
4296 emitMaster(*this, S
);
4299 static void emitMasked(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
4300 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4302 CGF
.EmitStmt(S
.getRawStmt());
4304 Expr
*Filter
= nullptr;
4305 if (const auto *FilterClause
= S
.getSingleClause
<OMPFilterClause
>())
4306 Filter
= FilterClause
->getThreadID();
4307 CGF
.CGM
.getOpenMPRuntime().emitMaskedRegion(CGF
, CodeGen
, S
.getBeginLoc(),
4311 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective
&S
) {
4312 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4313 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4314 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4316 const Stmt
*MaskedRegionBodyStmt
= S
.getAssociatedStmt();
4317 const Expr
*Filter
= nullptr;
4318 if (const auto *FilterClause
= S
.getSingleClause
<OMPFilterClause
>())
4319 Filter
= FilterClause
->getThreadID();
4320 llvm::Value
*FilterVal
= Filter
4321 ? EmitScalarExpr(Filter
, CGM
.Int32Ty
)
4322 : llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/0);
4324 auto FiniCB
= [this](InsertPointTy IP
) {
4325 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4328 auto BodyGenCB
= [MaskedRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4329 InsertPointTy CodeGenIP
) {
4330 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4331 *this, MaskedRegionBodyStmt
, AllocaIP
, CodeGenIP
, "masked");
4334 LexicalScope
Scope(*this, S
.getSourceRange());
4337 OMPBuilder
.createMasked(Builder
, BodyGenCB
, FiniCB
, FilterVal
));
4341 LexicalScope
Scope(*this, S
.getSourceRange());
4343 emitMasked(*this, S
);
4346 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective
&S
) {
4347 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4348 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4349 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4351 const Stmt
*CriticalRegionBodyStmt
= S
.getAssociatedStmt();
4352 const Expr
*Hint
= nullptr;
4353 if (const auto *HintClause
= S
.getSingleClause
<OMPHintClause
>())
4354 Hint
= HintClause
->getHint();
4356 // TODO: This is slightly different from what's currently being done in
4357 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4358 // about typing is final.
4359 llvm::Value
*HintInst
= nullptr;
4362 Builder
.CreateIntCast(EmitScalarExpr(Hint
), CGM
.Int32Ty
, false);
4364 auto FiniCB
= [this](InsertPointTy IP
) {
4365 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4368 auto BodyGenCB
= [CriticalRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4369 InsertPointTy CodeGenIP
) {
4370 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4371 *this, CriticalRegionBodyStmt
, AllocaIP
, CodeGenIP
, "critical");
4374 LexicalScope
Scope(*this, S
.getSourceRange());
4376 Builder
.restoreIP(OMPBuilder
.createCritical(
4377 Builder
, BodyGenCB
, FiniCB
, S
.getDirectiveName().getAsString(),
4383 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4385 CGF
.EmitStmt(S
.getAssociatedStmt());
4387 const Expr
*Hint
= nullptr;
4388 if (const auto *HintClause
= S
.getSingleClause
<OMPHintClause
>())
4389 Hint
= HintClause
->getHint();
4390 LexicalScope
Scope(*this, S
.getSourceRange());
4392 CGM
.getOpenMPRuntime().emitCriticalRegion(*this,
4393 S
.getDirectiveName().getAsString(),
4394 CodeGen
, S
.getBeginLoc(), Hint
);
4397 void CodeGenFunction::EmitOMPParallelForDirective(
4398 const OMPParallelForDirective
&S
) {
4399 // Emit directive as a combined directive that consists of two implicit
4400 // directives: 'parallel' with 'for' directive.
4401 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4403 emitOMPCopyinClause(CGF
, S
);
4404 (void)emitWorksharingDirective(CGF
, S
, S
.hasCancel());
4407 const auto &&NumIteratorsGen
= [&S
](CodeGenFunction
&CGF
) {
4408 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
4409 CGCapturedStmtInfo
CGSI(CR_OpenMP
);
4410 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGSI
);
4411 OMPLoopScope
LoopScope(CGF
, S
);
4412 return CGF
.EmitScalarExpr(S
.getNumIterations());
4414 bool IsInscan
= llvm::any_of(S
.getClausesOfKind
<OMPReductionClause
>(),
4415 [](const OMPReductionClause
*C
) {
4416 return C
->getModifier() == OMPC_REDUCTION_inscan
;
4419 emitScanBasedDirectiveDecls(*this, S
, NumIteratorsGen
);
4421 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4422 emitCommonOMPParallelDirective(*this, S
, OMPD_for
, CodeGen
,
4423 emitEmptyBoundParameters
);
4425 emitScanBasedDirectiveFinals(*this, S
, NumIteratorsGen
);
4427 // Check for outer lastprivate conditional update.
4428 checkForLastprivateConditionalUpdate(*this, S
);
4431 void CodeGenFunction::EmitOMPParallelForSimdDirective(
4432 const OMPParallelForSimdDirective
&S
) {
4433 // Emit directive as a combined directive that consists of two implicit
4434 // directives: 'parallel' with 'for' directive.
4435 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4437 emitOMPCopyinClause(CGF
, S
);
4438 (void)emitWorksharingDirective(CGF
, S
, /*HasCancel=*/false);
4441 const auto &&NumIteratorsGen
= [&S
](CodeGenFunction
&CGF
) {
4442 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
4443 CGCapturedStmtInfo
CGSI(CR_OpenMP
);
4444 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGSI
);
4445 OMPLoopScope
LoopScope(CGF
, S
);
4446 return CGF
.EmitScalarExpr(S
.getNumIterations());
4448 bool IsInscan
= llvm::any_of(S
.getClausesOfKind
<OMPReductionClause
>(),
4449 [](const OMPReductionClause
*C
) {
4450 return C
->getModifier() == OMPC_REDUCTION_inscan
;
4453 emitScanBasedDirectiveDecls(*this, S
, NumIteratorsGen
);
4455 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4456 emitCommonOMPParallelDirective(*this, S
, OMPD_for_simd
, CodeGen
,
4457 emitEmptyBoundParameters
);
4459 emitScanBasedDirectiveFinals(*this, S
, NumIteratorsGen
);
4461 // Check for outer lastprivate conditional update.
4462 checkForLastprivateConditionalUpdate(*this, S
);
4465 void CodeGenFunction::EmitOMPParallelMasterDirective(
4466 const OMPParallelMasterDirective
&S
) {
4467 // Emit directive as a combined directive that consists of two implicit
4468 // directives: 'parallel' with 'master' directive.
4469 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4471 OMPPrivateScope
PrivateScope(CGF
);
4472 emitOMPCopyinClause(CGF
, S
);
4473 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
4474 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
4475 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
4476 (void)PrivateScope
.Privatize();
4478 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
4482 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4483 emitCommonOMPParallelDirective(*this, S
, OMPD_master
, CodeGen
,
4484 emitEmptyBoundParameters
);
4485 emitPostUpdateForReductionClause(*this, S
,
4486 [](CodeGenFunction
&) { return nullptr; });
4488 // Check for outer lastprivate conditional update.
4489 checkForLastprivateConditionalUpdate(*this, S
);
4492 void CodeGenFunction::EmitOMPParallelSectionsDirective(
4493 const OMPParallelSectionsDirective
&S
) {
4494 // Emit directive as a combined directive that consists of two implicit
4495 // directives: 'parallel' with 'sections' directive.
4496 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4498 emitOMPCopyinClause(CGF
, S
);
4499 CGF
.EmitSections(S
);
4503 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4504 emitCommonOMPParallelDirective(*this, S
, OMPD_sections
, CodeGen
,
4505 emitEmptyBoundParameters
);
4507 // Check for outer lastprivate conditional update.
4508 checkForLastprivateConditionalUpdate(*this, S
);
4512 /// Get the list of variables declared in the context of the untied tasks.
4513 class CheckVarsEscapingUntiedTaskDeclContext final
4514 : public ConstStmtVisitor
<CheckVarsEscapingUntiedTaskDeclContext
> {
4515 llvm::SmallVector
<const VarDecl
*, 4> PrivateDecls
;
4518 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4519 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4520 void VisitDeclStmt(const DeclStmt
*S
) {
4523 // Need to privatize only local vars, static locals can be processed as is.
4524 for (const Decl
*D
: S
->decls()) {
4525 if (const auto *VD
= dyn_cast_or_null
<VarDecl
>(D
))
4526 if (VD
->hasLocalStorage())
4527 PrivateDecls
.push_back(VD
);
4530 void VisitOMPExecutableDirective(const OMPExecutableDirective
*) {}
4531 void VisitCapturedStmt(const CapturedStmt
*) {}
4532 void VisitLambdaExpr(const LambdaExpr
*) {}
4533 void VisitBlockExpr(const BlockExpr
*) {}
4534 void VisitStmt(const Stmt
*S
) {
4537 for (const Stmt
*Child
: S
->children())
4542 /// Swaps list of vars with the provided one.
4543 ArrayRef
<const VarDecl
*> getPrivateDecls() const { return PrivateDecls
; }
4545 } // anonymous namespace
4547 static void buildDependences(const OMPExecutableDirective
&S
,
4548 OMPTaskDataTy
&Data
) {
4550 // First look for 'omp_all_memory' and add this first.
4551 bool OmpAllMemory
= false;
4553 S
.getClausesOfKind
<OMPDependClause
>(), [](const OMPDependClause
*C
) {
4554 return C
->getDependencyKind() == OMPC_DEPEND_outallmemory
||
4555 C
->getDependencyKind() == OMPC_DEPEND_inoutallmemory
;
4557 OmpAllMemory
= true;
4558 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4559 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4561 OMPTaskDataTy::DependData
&DD
=
4562 Data
.Dependences
.emplace_back(OMPC_DEPEND_outallmemory
,
4563 /*IteratorExpr=*/nullptr);
4564 // Add a nullptr Expr to simplify the codegen in emitDependData.
4565 DD
.DepExprs
.push_back(nullptr);
4567 // Add remaining dependences skipping any 'out' or 'inout' if they are
4568 // overridden by 'omp_all_memory'.
4569 for (const auto *C
: S
.getClausesOfKind
<OMPDependClause
>()) {
4570 OpenMPDependClauseKind Kind
= C
->getDependencyKind();
4571 if (Kind
== OMPC_DEPEND_outallmemory
|| Kind
== OMPC_DEPEND_inoutallmemory
)
4573 if (OmpAllMemory
&& (Kind
== OMPC_DEPEND_out
|| Kind
== OMPC_DEPEND_inout
))
4575 OMPTaskDataTy::DependData
&DD
=
4576 Data
.Dependences
.emplace_back(C
->getDependencyKind(), C
->getModifier());
4577 DD
.DepExprs
.append(C
->varlist_begin(), C
->varlist_end());
4581 void CodeGenFunction::EmitOMPTaskBasedDirective(
4582 const OMPExecutableDirective
&S
, const OpenMPDirectiveKind CapturedRegion
,
4583 const RegionCodeGenTy
&BodyGen
, const TaskGenTy
&TaskGen
,
4584 OMPTaskDataTy
&Data
) {
4585 // Emit outlined function for task construct.
4586 const CapturedStmt
*CS
= S
.getCapturedStmt(CapturedRegion
);
4587 auto I
= CS
->getCapturedDecl()->param_begin();
4588 auto PartId
= std::next(I
);
4589 auto TaskT
= std::next(I
, 4);
4590 // Check if the task is final
4591 if (const auto *Clause
= S
.getSingleClause
<OMPFinalClause
>()) {
4592 // If the condition constant folds and can be elided, try to avoid emitting
4593 // the condition and the dead arm of the if/else.
4594 const Expr
*Cond
= Clause
->getCondition();
4596 if (ConstantFoldsToSimpleInteger(Cond
, CondConstant
))
4597 Data
.Final
.setInt(CondConstant
);
4599 Data
.Final
.setPointer(EvaluateExprAsBool(Cond
));
4601 // By default the task is not final.
4602 Data
.Final
.setInt(/*IntVal=*/false);
4604 // Check if the task has 'priority' clause.
4605 if (const auto *Clause
= S
.getSingleClause
<OMPPriorityClause
>()) {
4606 const Expr
*Prio
= Clause
->getPriority();
4607 Data
.Priority
.setInt(/*IntVal=*/true);
4608 Data
.Priority
.setPointer(EmitScalarConversion(
4609 EmitScalarExpr(Prio
), Prio
->getType(),
4610 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4611 Prio
->getExprLoc()));
4613 // The first function argument for tasks is a thread id, the second one is a
4614 // part id (0 for tied tasks, >=0 for untied task).
4615 llvm::DenseSet
<const VarDecl
*> EmittedAsPrivate
;
4616 // Get list of private variables.
4617 for (const auto *C
: S
.getClausesOfKind
<OMPPrivateClause
>()) {
4618 auto IRef
= C
->varlist_begin();
4619 for (const Expr
*IInit
: C
->private_copies()) {
4620 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
4621 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
4622 Data
.PrivateVars
.push_back(*IRef
);
4623 Data
.PrivateCopies
.push_back(IInit
);
4628 EmittedAsPrivate
.clear();
4629 // Get list of firstprivate variables.
4630 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
4631 auto IRef
= C
->varlist_begin();
4632 auto IElemInitRef
= C
->inits().begin();
4633 for (const Expr
*IInit
: C
->private_copies()) {
4634 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
4635 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
4636 Data
.FirstprivateVars
.push_back(*IRef
);
4637 Data
.FirstprivateCopies
.push_back(IInit
);
4638 Data
.FirstprivateInits
.push_back(*IElemInitRef
);
4644 // Get list of lastprivate variables (for taskloops).
4645 llvm::MapVector
<const VarDecl
*, const DeclRefExpr
*> LastprivateDstsOrigs
;
4646 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
4647 auto IRef
= C
->varlist_begin();
4648 auto ID
= C
->destination_exprs().begin();
4649 for (const Expr
*IInit
: C
->private_copies()) {
4650 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
4651 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
4652 Data
.LastprivateVars
.push_back(*IRef
);
4653 Data
.LastprivateCopies
.push_back(IInit
);
4655 LastprivateDstsOrigs
.insert(
4656 std::make_pair(cast
<VarDecl
>(cast
<DeclRefExpr
>(*ID
)->getDecl()),
4657 cast
<DeclRefExpr
>(*IRef
)));
4662 SmallVector
<const Expr
*, 4> LHSs
;
4663 SmallVector
<const Expr
*, 4> RHSs
;
4664 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
4665 Data
.ReductionVars
.append(C
->varlist_begin(), C
->varlist_end());
4666 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
4667 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
4668 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
4669 C
->reduction_ops().end());
4670 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
4671 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
4673 Data
.Reductions
= CGM
.getOpenMPRuntime().emitTaskReductionInit(
4674 *this, S
.getBeginLoc(), LHSs
, RHSs
, Data
);
4675 // Build list of dependences.
4676 buildDependences(S
, Data
);
4677 // Get list of local vars for untied tasks.
4679 CheckVarsEscapingUntiedTaskDeclContext Checker
;
4680 Checker
.Visit(S
.getInnermostCapturedStmt()->getCapturedStmt());
4681 Data
.PrivateLocals
.append(Checker
.getPrivateDecls().begin(),
4682 Checker
.getPrivateDecls().end());
4684 auto &&CodeGen
= [&Data
, &S
, CS
, &BodyGen
, &LastprivateDstsOrigs
,
4685 CapturedRegion
](CodeGenFunction
&CGF
,
4686 PrePostActionTy
&Action
) {
4687 llvm::MapVector
<CanonicalDeclPtr
<const VarDecl
>,
4688 std::pair
<Address
, Address
>>
4690 // Set proper addresses for generated private copies.
4691 OMPPrivateScope
Scope(CGF
);
4692 // Generate debug info for variables present in shared clause.
4693 if (auto *DI
= CGF
.getDebugInfo()) {
4694 llvm::SmallDenseMap
<const VarDecl
*, FieldDecl
*> CaptureFields
=
4695 CGF
.CapturedStmtInfo
->getCaptureFields();
4696 llvm::Value
*ContextValue
= CGF
.CapturedStmtInfo
->getContextValue();
4697 if (CaptureFields
.size() && ContextValue
) {
4698 unsigned CharWidth
= CGF
.getContext().getCharWidth();
4699 // The shared variables are packed together as members of structure.
4700 // So the address of each shared variable can be computed by adding
4701 // offset of it (within record) to the base address of record. For each
4702 // shared variable, debug intrinsic llvm.dbg.declare is generated with
4703 // appropriate expressions (DIExpression).
4705 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4706 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4708 // metadata !DIExpression(DW_OP_deref))
4709 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4711 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4712 for (auto It
= CaptureFields
.begin(); It
!= CaptureFields
.end(); ++It
) {
4713 const VarDecl
*SharedVar
= It
->first
;
4714 RecordDecl
*CaptureRecord
= It
->second
->getParent();
4715 const ASTRecordLayout
&Layout
=
4716 CGF
.getContext().getASTRecordLayout(CaptureRecord
);
4718 Layout
.getFieldOffset(It
->second
->getFieldIndex()) / CharWidth
;
4719 if (CGF
.CGM
.getCodeGenOpts().hasReducedDebugInfo())
4720 (void)DI
->EmitDeclareOfAutoVariable(SharedVar
, ContextValue
,
4721 CGF
.Builder
, false);
4722 llvm::Instruction
&Last
= CGF
.Builder
.GetInsertBlock()->back();
4723 // Get the call dbg.declare instruction we just created and update
4724 // its DIExpression to add offset to base address.
4725 if (auto DDI
= dyn_cast
<llvm::DbgVariableIntrinsic
>(&Last
)) {
4726 SmallVector
<uint64_t, 8> Ops
;
4727 // Add offset to the base address if non zero.
4729 Ops
.push_back(llvm::dwarf::DW_OP_plus_uconst
);
4730 Ops
.push_back(Offset
);
4732 Ops
.push_back(llvm::dwarf::DW_OP_deref
);
4733 auto &Ctx
= DDI
->getContext();
4734 llvm::DIExpression
*DIExpr
= llvm::DIExpression::get(Ctx
, Ops
);
4735 Last
.setOperand(2, llvm::MetadataAsValue::get(Ctx
, DIExpr
));
4740 llvm::SmallVector
<std::pair
<const VarDecl
*, Address
>, 16> FirstprivatePtrs
;
4741 if (!Data
.PrivateVars
.empty() || !Data
.FirstprivateVars
.empty() ||
4742 !Data
.LastprivateVars
.empty() || !Data
.PrivateLocals
.empty()) {
4743 enum { PrivatesParam
= 2, CopyFnParam
= 3 };
4744 llvm::Value
*CopyFn
= CGF
.Builder
.CreateLoad(
4745 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(CopyFnParam
)));
4746 llvm::Value
*PrivatesPtr
= CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(
4747 CS
->getCapturedDecl()->getParam(PrivatesParam
)));
4749 llvm::SmallVector
<std::pair
<const VarDecl
*, Address
>, 16> PrivatePtrs
;
4750 llvm::SmallVector
<llvm::Value
*, 16> CallArgs
;
4751 llvm::SmallVector
<llvm::Type
*, 4> ParamTypes
;
4752 CallArgs
.push_back(PrivatesPtr
);
4753 ParamTypes
.push_back(PrivatesPtr
->getType());
4754 for (const Expr
*E
: Data
.PrivateVars
) {
4755 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
4756 Address PrivatePtr
= CGF
.CreateMemTemp(
4757 CGF
.getContext().getPointerType(E
->getType()), ".priv.ptr.addr");
4758 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
4759 CallArgs
.push_back(PrivatePtr
.getPointer());
4760 ParamTypes
.push_back(PrivatePtr
.getType());
4762 for (const Expr
*E
: Data
.FirstprivateVars
) {
4763 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
4764 Address PrivatePtr
=
4765 CGF
.CreateMemTemp(CGF
.getContext().getPointerType(E
->getType()),
4766 ".firstpriv.ptr.addr");
4767 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
4768 FirstprivatePtrs
.emplace_back(VD
, PrivatePtr
);
4769 CallArgs
.push_back(PrivatePtr
.getPointer());
4770 ParamTypes
.push_back(PrivatePtr
.getType());
4772 for (const Expr
*E
: Data
.LastprivateVars
) {
4773 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
4774 Address PrivatePtr
=
4775 CGF
.CreateMemTemp(CGF
.getContext().getPointerType(E
->getType()),
4776 ".lastpriv.ptr.addr");
4777 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
4778 CallArgs
.push_back(PrivatePtr
.getPointer());
4779 ParamTypes
.push_back(PrivatePtr
.getType());
4781 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
4782 QualType Ty
= VD
->getType().getNonReferenceType();
4783 if (VD
->getType()->isLValueReferenceType())
4784 Ty
= CGF
.getContext().getPointerType(Ty
);
4785 if (isAllocatableDecl(VD
))
4786 Ty
= CGF
.getContext().getPointerType(Ty
);
4787 Address PrivatePtr
= CGF
.CreateMemTemp(
4788 CGF
.getContext().getPointerType(Ty
), ".local.ptr.addr");
4789 auto Result
= UntiedLocalVars
.insert(
4790 std::make_pair(VD
, std::make_pair(PrivatePtr
, Address::invalid())));
4791 // If key exists update in place.
4792 if (Result
.second
== false)
4793 *Result
.first
= std::make_pair(
4794 VD
, std::make_pair(PrivatePtr
, Address::invalid()));
4795 CallArgs
.push_back(PrivatePtr
.getPointer());
4796 ParamTypes
.push_back(PrivatePtr
.getType());
4798 auto *CopyFnTy
= llvm::FunctionType::get(CGF
.Builder
.getVoidTy(),
4799 ParamTypes
, /*isVarArg=*/false);
4800 CopyFn
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4801 CopyFn
, CopyFnTy
->getPointerTo());
4802 CGF
.CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(
4803 CGF
, S
.getBeginLoc(), {CopyFnTy
, CopyFn
}, CallArgs
);
4804 for (const auto &Pair
: LastprivateDstsOrigs
) {
4805 const auto *OrigVD
= cast
<VarDecl
>(Pair
.second
->getDecl());
4806 DeclRefExpr
DRE(CGF
.getContext(), const_cast<VarDecl
*>(OrigVD
),
4807 /*RefersToEnclosingVariableOrCapture=*/
4808 CGF
.CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
4809 Pair
.second
->getType(), VK_LValue
,
4810 Pair
.second
->getExprLoc());
4811 Scope
.addPrivate(Pair
.first
, CGF
.EmitLValue(&DRE
).getAddress(CGF
));
4813 for (const auto &Pair
: PrivatePtrs
) {
4814 Address Replacement
= Address(
4815 CGF
.Builder
.CreateLoad(Pair
.second
),
4816 CGF
.ConvertTypeForMem(Pair
.first
->getType().getNonReferenceType()),
4817 CGF
.getContext().getDeclAlign(Pair
.first
));
4818 Scope
.addPrivate(Pair
.first
, Replacement
);
4819 if (auto *DI
= CGF
.getDebugInfo())
4820 if (CGF
.CGM
.getCodeGenOpts().hasReducedDebugInfo())
4821 (void)DI
->EmitDeclareOfAutoVariable(
4822 Pair
.first
, Pair
.second
.getPointer(), CGF
.Builder
,
4823 /*UsePointerValue*/ true);
4825 // Adjust mapping for internal locals by mapping actual memory instead of
4826 // a pointer to this memory.
4827 for (auto &Pair
: UntiedLocalVars
) {
4828 QualType VDType
= Pair
.first
->getType().getNonReferenceType();
4829 if (isAllocatableDecl(Pair
.first
)) {
4830 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(Pair
.second
.first
);
4831 Address
Replacement(
4833 CGF
.ConvertTypeForMem(CGF
.getContext().getPointerType(VDType
)),
4834 CGF
.getPointerAlign());
4835 Pair
.second
.first
= Replacement
;
4836 Ptr
= CGF
.Builder
.CreateLoad(Replacement
);
4837 Replacement
= Address(Ptr
, CGF
.ConvertTypeForMem(VDType
),
4838 CGF
.getContext().getDeclAlign(Pair
.first
));
4839 Pair
.second
.second
= Replacement
;
4841 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(Pair
.second
.first
);
4842 Address
Replacement(Ptr
, CGF
.ConvertTypeForMem(VDType
),
4843 CGF
.getContext().getDeclAlign(Pair
.first
));
4844 Pair
.second
.first
= Replacement
;
4848 if (Data
.Reductions
) {
4849 OMPPrivateScope
FirstprivateScope(CGF
);
4850 for (const auto &Pair
: FirstprivatePtrs
) {
4851 Address
Replacement(
4852 CGF
.Builder
.CreateLoad(Pair
.second
),
4853 CGF
.ConvertTypeForMem(Pair
.first
->getType().getNonReferenceType()),
4854 CGF
.getContext().getDeclAlign(Pair
.first
));
4855 FirstprivateScope
.addPrivate(Pair
.first
, Replacement
);
4857 (void)FirstprivateScope
.Privatize();
4858 OMPLexicalScope
LexScope(CGF
, S
, CapturedRegion
);
4859 ReductionCodeGen
RedCG(Data
.ReductionVars
, Data
.ReductionVars
,
4860 Data
.ReductionCopies
, Data
.ReductionOps
);
4861 llvm::Value
*ReductionsPtr
= CGF
.Builder
.CreateLoad(
4862 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(9)));
4863 for (unsigned Cnt
= 0, E
= Data
.ReductionVars
.size(); Cnt
< E
; ++Cnt
) {
4864 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
4865 RedCG
.emitAggregateType(CGF
, Cnt
);
4866 // FIXME: This must removed once the runtime library is fixed.
4867 // Emit required threadprivate variables for
4868 // initializer/combiner/finalizer.
4869 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
4871 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
4872 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
4874 Address(CGF
.EmitScalarConversion(
4875 Replacement
.getPointer(), CGF
.getContext().VoidPtrTy
,
4876 CGF
.getContext().getPointerType(
4877 Data
.ReductionCopies
[Cnt
]->getType()),
4878 Data
.ReductionCopies
[Cnt
]->getExprLoc()),
4879 CGF
.ConvertTypeForMem(Data
.ReductionCopies
[Cnt
]->getType()),
4880 Replacement
.getAlignment());
4881 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
4882 Scope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
4885 // Privatize all private variables except for in_reduction items.
4886 (void)Scope
.Privatize();
4887 SmallVector
<const Expr
*, 4> InRedVars
;
4888 SmallVector
<const Expr
*, 4> InRedPrivs
;
4889 SmallVector
<const Expr
*, 4> InRedOps
;
4890 SmallVector
<const Expr
*, 4> TaskgroupDescriptors
;
4891 for (const auto *C
: S
.getClausesOfKind
<OMPInReductionClause
>()) {
4892 auto IPriv
= C
->privates().begin();
4893 auto IRed
= C
->reduction_ops().begin();
4894 auto ITD
= C
->taskgroup_descriptors().begin();
4895 for (const Expr
*Ref
: C
->varlists()) {
4896 InRedVars
.emplace_back(Ref
);
4897 InRedPrivs
.emplace_back(*IPriv
);
4898 InRedOps
.emplace_back(*IRed
);
4899 TaskgroupDescriptors
.emplace_back(*ITD
);
4900 std::advance(IPriv
, 1);
4901 std::advance(IRed
, 1);
4902 std::advance(ITD
, 1);
4905 // Privatize in_reduction items here, because taskgroup descriptors must be
4906 // privatized earlier.
4907 OMPPrivateScope
InRedScope(CGF
);
4908 if (!InRedVars
.empty()) {
4909 ReductionCodeGen
RedCG(InRedVars
, InRedVars
, InRedPrivs
, InRedOps
);
4910 for (unsigned Cnt
= 0, E
= InRedVars
.size(); Cnt
< E
; ++Cnt
) {
4911 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
4912 RedCG
.emitAggregateType(CGF
, Cnt
);
4913 // The taskgroup descriptor variable is always implicit firstprivate and
4914 // privatized already during processing of the firstprivates.
4915 // FIXME: This must removed once the runtime library is fixed.
4916 // Emit required threadprivate variables for
4917 // initializer/combiner/finalizer.
4918 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
4920 llvm::Value
*ReductionsPtr
;
4921 if (const Expr
*TRExpr
= TaskgroupDescriptors
[Cnt
]) {
4922 ReductionsPtr
= CGF
.EmitLoadOfScalar(CGF
.EmitLValue(TRExpr
),
4923 TRExpr
->getExprLoc());
4925 ReductionsPtr
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4927 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
4928 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
4929 Replacement
= Address(
4930 CGF
.EmitScalarConversion(
4931 Replacement
.getPointer(), CGF
.getContext().VoidPtrTy
,
4932 CGF
.getContext().getPointerType(InRedPrivs
[Cnt
]->getType()),
4933 InRedPrivs
[Cnt
]->getExprLoc()),
4934 CGF
.ConvertTypeForMem(InRedPrivs
[Cnt
]->getType()),
4935 Replacement
.getAlignment());
4936 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
4937 InRedScope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
4940 (void)InRedScope
.Privatize();
4942 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII
LocalVarsScope(CGF
,
4947 llvm::Function
*OutlinedFn
= CGM
.getOpenMPRuntime().emitTaskOutlinedFunction(
4948 S
, *I
, *PartId
, *TaskT
, S
.getDirectiveKind(), CodeGen
, Data
.Tied
,
4949 Data
.NumberOfParts
);
4950 OMPLexicalScope
Scope(*this, S
, std::nullopt
,
4951 !isOpenMPParallelDirective(S
.getDirectiveKind()) &&
4952 !isOpenMPSimdDirective(S
.getDirectiveKind()));
4953 TaskGen(*this, OutlinedFn
, Data
);
4956 static ImplicitParamDecl
*
4957 createImplicitFirstprivateForType(ASTContext
&C
, OMPTaskDataTy
&Data
,
4958 QualType Ty
, CapturedDecl
*CD
,
4959 SourceLocation Loc
) {
4960 auto *OrigVD
= ImplicitParamDecl::Create(C
, CD
, Loc
, /*Id=*/nullptr, Ty
,
4961 ImplicitParamDecl::Other
);
4962 auto *OrigRef
= DeclRefExpr::Create(
4963 C
, NestedNameSpecifierLoc(), SourceLocation(), OrigVD
,
4964 /*RefersToEnclosingVariableOrCapture=*/false, Loc
, Ty
, VK_LValue
);
4965 auto *PrivateVD
= ImplicitParamDecl::Create(C
, CD
, Loc
, /*Id=*/nullptr, Ty
,
4966 ImplicitParamDecl::Other
);
4967 auto *PrivateRef
= DeclRefExpr::Create(
4968 C
, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD
,
4969 /*RefersToEnclosingVariableOrCapture=*/false, Loc
, Ty
, VK_LValue
);
4970 QualType ElemType
= C
.getBaseElementType(Ty
);
4971 auto *InitVD
= ImplicitParamDecl::Create(C
, CD
, Loc
, /*Id=*/nullptr, ElemType
,
4972 ImplicitParamDecl::Other
);
4973 auto *InitRef
= DeclRefExpr::Create(
4974 C
, NestedNameSpecifierLoc(), SourceLocation(), InitVD
,
4975 /*RefersToEnclosingVariableOrCapture=*/false, Loc
, ElemType
, VK_LValue
);
4976 PrivateVD
->setInitStyle(VarDecl::CInit
);
4977 PrivateVD
->setInit(ImplicitCastExpr::Create(C
, ElemType
, CK_LValueToRValue
,
4978 InitRef
, /*BasePath=*/nullptr,
4979 VK_PRValue
, FPOptionsOverride()));
4980 Data
.FirstprivateVars
.emplace_back(OrigRef
);
4981 Data
.FirstprivateCopies
.emplace_back(PrivateRef
);
4982 Data
.FirstprivateInits
.emplace_back(InitRef
);
4986 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
4987 const OMPExecutableDirective
&S
, const RegionCodeGenTy
&BodyGen
,
4988 OMPTargetDataInfo
&InputInfo
) {
4989 // Emit outlined function for task construct.
4990 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_task
);
4991 Address CapturedStruct
= GenerateCapturedStmtArgument(*CS
);
4992 QualType SharedsTy
= getContext().getRecordType(CS
->getCapturedRecordDecl());
4993 auto I
= CS
->getCapturedDecl()->param_begin();
4994 auto PartId
= std::next(I
);
4995 auto TaskT
= std::next(I
, 4);
4997 // The task is not final.
4998 Data
.Final
.setInt(/*IntVal=*/false);
4999 // Get list of firstprivate variables.
5000 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
5001 auto IRef
= C
->varlist_begin();
5002 auto IElemInitRef
= C
->inits().begin();
5003 for (auto *IInit
: C
->private_copies()) {
5004 Data
.FirstprivateVars
.push_back(*IRef
);
5005 Data
.FirstprivateCopies
.push_back(IInit
);
5006 Data
.FirstprivateInits
.push_back(*IElemInitRef
);
5011 SmallVector
<const Expr
*, 4> LHSs
;
5012 SmallVector
<const Expr
*, 4> RHSs
;
5013 for (const auto *C
: S
.getClausesOfKind
<OMPInReductionClause
>()) {
5014 Data
.ReductionVars
.append(C
->varlist_begin(), C
->varlist_end());
5015 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
5016 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
5017 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
5018 C
->reduction_ops().end());
5019 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
5020 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
5022 OMPPrivateScope
TargetScope(*this);
5023 VarDecl
*BPVD
= nullptr;
5024 VarDecl
*PVD
= nullptr;
5025 VarDecl
*SVD
= nullptr;
5026 VarDecl
*MVD
= nullptr;
5027 if (InputInfo
.NumberOfTargetItems
> 0) {
5028 auto *CD
= CapturedDecl::Create(
5029 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5030 llvm::APInt
ArrSize(/*numBits=*/32, InputInfo
.NumberOfTargetItems
);
5031 QualType BaseAndPointerAndMapperType
= getContext().getConstantArrayType(
5032 getContext().VoidPtrTy
, ArrSize
, nullptr, ArrayType::Normal
,
5033 /*IndexTypeQuals=*/0);
5034 BPVD
= createImplicitFirstprivateForType(
5035 getContext(), Data
, BaseAndPointerAndMapperType
, CD
, S
.getBeginLoc());
5036 PVD
= createImplicitFirstprivateForType(
5037 getContext(), Data
, BaseAndPointerAndMapperType
, CD
, S
.getBeginLoc());
5038 QualType SizesType
= getContext().getConstantArrayType(
5039 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5040 ArrSize
, nullptr, ArrayType::Normal
,
5041 /*IndexTypeQuals=*/0);
5042 SVD
= createImplicitFirstprivateForType(getContext(), Data
, SizesType
, CD
,
5044 TargetScope
.addPrivate(BPVD
, InputInfo
.BasePointersArray
);
5045 TargetScope
.addPrivate(PVD
, InputInfo
.PointersArray
);
5046 TargetScope
.addPrivate(SVD
, InputInfo
.SizesArray
);
5047 // If there is no user-defined mapper, the mapper array will be nullptr. In
5048 // this case, we don't need to privatize it.
5049 if (!isa_and_nonnull
<llvm::ConstantPointerNull
>(
5050 InputInfo
.MappersArray
.getPointer())) {
5051 MVD
= createImplicitFirstprivateForType(
5052 getContext(), Data
, BaseAndPointerAndMapperType
, CD
, S
.getBeginLoc());
5053 TargetScope
.addPrivate(MVD
, InputInfo
.MappersArray
);
5056 (void)TargetScope
.Privatize();
5057 buildDependences(S
, Data
);
5058 auto &&CodeGen
= [&Data
, &S
, CS
, &BodyGen
, BPVD
, PVD
, SVD
, MVD
,
5059 &InputInfo
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5060 // Set proper addresses for generated private copies.
5061 OMPPrivateScope
Scope(CGF
);
5062 if (!Data
.FirstprivateVars
.empty()) {
5063 enum { PrivatesParam
= 2, CopyFnParam
= 3 };
5064 llvm::Value
*CopyFn
= CGF
.Builder
.CreateLoad(
5065 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(CopyFnParam
)));
5066 llvm::Value
*PrivatesPtr
= CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(
5067 CS
->getCapturedDecl()->getParam(PrivatesParam
)));
5069 llvm::SmallVector
<std::pair
<const VarDecl
*, Address
>, 16> PrivatePtrs
;
5070 llvm::SmallVector
<llvm::Value
*, 16> CallArgs
;
5071 llvm::SmallVector
<llvm::Type
*, 4> ParamTypes
;
5072 CallArgs
.push_back(PrivatesPtr
);
5073 ParamTypes
.push_back(PrivatesPtr
->getType());
5074 for (const Expr
*E
: Data
.FirstprivateVars
) {
5075 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
5076 Address PrivatePtr
=
5077 CGF
.CreateMemTemp(CGF
.getContext().getPointerType(E
->getType()),
5078 ".firstpriv.ptr.addr");
5079 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
5080 CallArgs
.push_back(PrivatePtr
.getPointer());
5081 ParamTypes
.push_back(PrivatePtr
.getType());
5083 auto *CopyFnTy
= llvm::FunctionType::get(CGF
.Builder
.getVoidTy(),
5084 ParamTypes
, /*isVarArg=*/false);
5085 CopyFn
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5086 CopyFn
, CopyFnTy
->getPointerTo());
5087 CGF
.CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(
5088 CGF
, S
.getBeginLoc(), {CopyFnTy
, CopyFn
}, CallArgs
);
5089 for (const auto &Pair
: PrivatePtrs
) {
5090 Address
Replacement(
5091 CGF
.Builder
.CreateLoad(Pair
.second
),
5092 CGF
.ConvertTypeForMem(Pair
.first
->getType().getNonReferenceType()),
5093 CGF
.getContext().getDeclAlign(Pair
.first
));
5094 Scope
.addPrivate(Pair
.first
, Replacement
);
5097 CGF
.processInReduction(S
, Data
, CGF
, CS
, Scope
);
5098 if (InputInfo
.NumberOfTargetItems
> 0) {
5099 InputInfo
.BasePointersArray
= CGF
.Builder
.CreateConstArrayGEP(
5100 CGF
.GetAddrOfLocalVar(BPVD
), /*Index=*/0);
5101 InputInfo
.PointersArray
= CGF
.Builder
.CreateConstArrayGEP(
5102 CGF
.GetAddrOfLocalVar(PVD
), /*Index=*/0);
5103 InputInfo
.SizesArray
= CGF
.Builder
.CreateConstArrayGEP(
5104 CGF
.GetAddrOfLocalVar(SVD
), /*Index=*/0);
5105 // If MVD is nullptr, the mapper array is not privatized
5107 InputInfo
.MappersArray
= CGF
.Builder
.CreateConstArrayGEP(
5108 CGF
.GetAddrOfLocalVar(MVD
), /*Index=*/0);
5112 OMPLexicalScope
LexScope(CGF
, S
, OMPD_task
, /*EmitPreInitStmt=*/false);
5115 llvm::Function
*OutlinedFn
= CGM
.getOpenMPRuntime().emitTaskOutlinedFunction(
5116 S
, *I
, *PartId
, *TaskT
, S
.getDirectiveKind(), CodeGen
, /*Tied=*/true,
5117 Data
.NumberOfParts
);
5118 llvm::APInt
TrueOrFalse(32, S
.hasClausesOfKind
<OMPNowaitClause
>() ? 1 : 0);
5119 IntegerLiteral
IfCond(getContext(), TrueOrFalse
,
5120 getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
5122 CGM
.getOpenMPRuntime().emitTaskCall(*this, S
.getBeginLoc(), S
, OutlinedFn
,
5123 SharedsTy
, CapturedStruct
, &IfCond
, Data
);
5126 void CodeGenFunction::processInReduction(const OMPExecutableDirective
&S
,
5127 OMPTaskDataTy
&Data
,
5128 CodeGenFunction
&CGF
,
5129 const CapturedStmt
*CS
,
5130 OMPPrivateScope
&Scope
) {
5131 if (Data
.Reductions
) {
5132 OpenMPDirectiveKind CapturedRegion
= S
.getDirectiveKind();
5133 OMPLexicalScope
LexScope(CGF
, S
, CapturedRegion
);
5134 ReductionCodeGen
RedCG(Data
.ReductionVars
, Data
.ReductionVars
,
5135 Data
.ReductionCopies
, Data
.ReductionOps
);
5136 llvm::Value
*ReductionsPtr
= CGF
.Builder
.CreateLoad(
5137 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(4)));
5138 for (unsigned Cnt
= 0, E
= Data
.ReductionVars
.size(); Cnt
< E
; ++Cnt
) {
5139 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
5140 RedCG
.emitAggregateType(CGF
, Cnt
);
5141 // FIXME: This must removed once the runtime library is fixed.
5142 // Emit required threadprivate variables for
5143 // initializer/combiner/finalizer.
5144 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
5146 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
5147 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
5149 Address(CGF
.EmitScalarConversion(
5150 Replacement
.getPointer(), CGF
.getContext().VoidPtrTy
,
5151 CGF
.getContext().getPointerType(
5152 Data
.ReductionCopies
[Cnt
]->getType()),
5153 Data
.ReductionCopies
[Cnt
]->getExprLoc()),
5154 CGF
.ConvertTypeForMem(Data
.ReductionCopies
[Cnt
]->getType()),
5155 Replacement
.getAlignment());
5156 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
5157 Scope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
5160 (void)Scope
.Privatize();
5161 SmallVector
<const Expr
*, 4> InRedVars
;
5162 SmallVector
<const Expr
*, 4> InRedPrivs
;
5163 SmallVector
<const Expr
*, 4> InRedOps
;
5164 SmallVector
<const Expr
*, 4> TaskgroupDescriptors
;
5165 for (const auto *C
: S
.getClausesOfKind
<OMPInReductionClause
>()) {
5166 auto IPriv
= C
->privates().begin();
5167 auto IRed
= C
->reduction_ops().begin();
5168 auto ITD
= C
->taskgroup_descriptors().begin();
5169 for (const Expr
*Ref
: C
->varlists()) {
5170 InRedVars
.emplace_back(Ref
);
5171 InRedPrivs
.emplace_back(*IPriv
);
5172 InRedOps
.emplace_back(*IRed
);
5173 TaskgroupDescriptors
.emplace_back(*ITD
);
5174 std::advance(IPriv
, 1);
5175 std::advance(IRed
, 1);
5176 std::advance(ITD
, 1);
5179 OMPPrivateScope
InRedScope(CGF
);
5180 if (!InRedVars
.empty()) {
5181 ReductionCodeGen
RedCG(InRedVars
, InRedVars
, InRedPrivs
, InRedOps
);
5182 for (unsigned Cnt
= 0, E
= InRedVars
.size(); Cnt
< E
; ++Cnt
) {
5183 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
5184 RedCG
.emitAggregateType(CGF
, Cnt
);
5185 // FIXME: This must removed once the runtime library is fixed.
5186 // Emit required threadprivate variables for
5187 // initializer/combiner/finalizer.
5188 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
5190 llvm::Value
*ReductionsPtr
;
5191 if (const Expr
*TRExpr
= TaskgroupDescriptors
[Cnt
]) {
5193 CGF
.EmitLoadOfScalar(CGF
.EmitLValue(TRExpr
), TRExpr
->getExprLoc());
5195 ReductionsPtr
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5197 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
5198 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
5199 Replacement
= Address(
5200 CGF
.EmitScalarConversion(
5201 Replacement
.getPointer(), CGF
.getContext().VoidPtrTy
,
5202 CGF
.getContext().getPointerType(InRedPrivs
[Cnt
]->getType()),
5203 InRedPrivs
[Cnt
]->getExprLoc()),
5204 CGF
.ConvertTypeForMem(InRedPrivs
[Cnt
]->getType()),
5205 Replacement
.getAlignment());
5206 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
5207 InRedScope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
5210 (void)InRedScope
.Privatize();
5213 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective
&S
) {
5214 // Emit outlined function for task construct.
5215 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_task
);
5216 Address CapturedStruct
= GenerateCapturedStmtArgument(*CS
);
5217 QualType SharedsTy
= getContext().getRecordType(CS
->getCapturedRecordDecl());
5218 const Expr
*IfCond
= nullptr;
5219 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
5220 if (C
->getNameModifier() == OMPD_unknown
||
5221 C
->getNameModifier() == OMPD_task
) {
5222 IfCond
= C
->getCondition();
5228 // Check if we should emit tied or untied task.
5229 Data
.Tied
= !S
.getSingleClause
<OMPUntiedClause
>();
5230 auto &&BodyGen
= [CS
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5231 CGF
.EmitStmt(CS
->getCapturedStmt());
5233 auto &&TaskGen
= [&S
, SharedsTy
, CapturedStruct
,
5234 IfCond
](CodeGenFunction
&CGF
, llvm::Function
*OutlinedFn
,
5235 const OMPTaskDataTy
&Data
) {
5236 CGF
.CGM
.getOpenMPRuntime().emitTaskCall(CGF
, S
.getBeginLoc(), S
, OutlinedFn
,
5237 SharedsTy
, CapturedStruct
, IfCond
,
5241 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
5242 EmitOMPTaskBasedDirective(S
, OMPD_task
, BodyGen
, TaskGen
, Data
);
5245 void CodeGenFunction::EmitOMPTaskyieldDirective(
5246 const OMPTaskyieldDirective
&S
) {
5247 CGM
.getOpenMPRuntime().emitTaskyieldCall(*this, S
.getBeginLoc());
5250 void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective
&S
) {
5251 const OMPMessageClause
*MC
= S
.getSingleClause
<OMPMessageClause
>();
5252 Expr
*ME
= MC
? MC
->getMessageString() : nullptr;
5253 const OMPSeverityClause
*SC
= S
.getSingleClause
<OMPSeverityClause
>();
5254 bool IsFatal
= false;
5255 if (!SC
|| SC
->getSeverityKind() == OMPC_SEVERITY_fatal
)
5257 CGM
.getOpenMPRuntime().emitErrorCall(*this, S
.getBeginLoc(), ME
, IsFatal
);
5260 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective
&S
) {
5261 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(), OMPD_barrier
);
5264 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective
&S
) {
5266 // Build list of dependences
5267 buildDependences(S
, Data
);
5268 Data
.HasNowaitClause
= S
.hasClausesOfKind
<OMPNowaitClause
>();
5269 CGM
.getOpenMPRuntime().emitTaskwaitCall(*this, S
.getBeginLoc(), Data
);
5272 bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective
&T
) {
5273 return T
.clauses().empty();
5276 void CodeGenFunction::EmitOMPTaskgroupDirective(
5277 const OMPTaskgroupDirective
&S
) {
5278 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5279 if (CGM
.getLangOpts().OpenMPIRBuilder
&& isSupportedByOpenMPIRBuilder(S
)) {
5280 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
5281 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
5282 InsertPointTy
AllocaIP(AllocaInsertPt
->getParent(),
5283 AllocaInsertPt
->getIterator());
5285 auto BodyGenCB
= [&, this](InsertPointTy AllocaIP
,
5286 InsertPointTy CodeGenIP
) {
5287 Builder
.restoreIP(CodeGenIP
);
5288 EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
5290 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo
;
5291 if (!CapturedStmtInfo
)
5292 CapturedStmtInfo
= &CapStmtInfo
;
5293 Builder
.restoreIP(OMPBuilder
.createTaskgroup(Builder
, AllocaIP
, BodyGenCB
));
5296 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5298 if (const Expr
*E
= S
.getReductionRef()) {
5299 SmallVector
<const Expr
*, 4> LHSs
;
5300 SmallVector
<const Expr
*, 4> RHSs
;
5302 for (const auto *C
: S
.getClausesOfKind
<OMPTaskReductionClause
>()) {
5303 Data
.ReductionVars
.append(C
->varlist_begin(), C
->varlist_end());
5304 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
5305 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
5306 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
5307 C
->reduction_ops().end());
5308 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
5309 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
5311 llvm::Value
*ReductionDesc
=
5312 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionInit(CGF
, S
.getBeginLoc(),
5314 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
5315 CGF
.EmitVarDecl(*VD
);
5316 CGF
.EmitStoreOfScalar(ReductionDesc
, CGF
.GetAddrOfLocalVar(VD
),
5317 /*Volatile=*/false, E
->getType());
5319 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
5321 CGM
.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen
, S
.getBeginLoc());
5324 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective
&S
) {
5325 llvm::AtomicOrdering AO
= S
.getSingleClause
<OMPFlushClause
>()
5326 ? llvm::AtomicOrdering::NotAtomic
5327 : llvm::AtomicOrdering::AcquireRelease
;
5328 CGM
.getOpenMPRuntime().emitFlush(
5330 [&S
]() -> ArrayRef
<const Expr
*> {
5331 if (const auto *FlushClause
= S
.getSingleClause
<OMPFlushClause
>())
5332 return llvm::ArrayRef(FlushClause
->varlist_begin(),
5333 FlushClause
->varlist_end());
5334 return std::nullopt
;
5336 S
.getBeginLoc(), AO
);
5339 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective
&S
) {
5340 const auto *DO
= S
.getSingleClause
<OMPDepobjClause
>();
5341 LValue DOLVal
= EmitLValue(DO
->getDepobj());
5342 if (const auto *DC
= S
.getSingleClause
<OMPDependClause
>()) {
5343 OMPTaskDataTy::DependData
Dependencies(DC
->getDependencyKind(),
5345 Dependencies
.DepExprs
.append(DC
->varlist_begin(), DC
->varlist_end());
5346 Address DepAddr
= CGM
.getOpenMPRuntime().emitDepobjDependClause(
5347 *this, Dependencies
, DC
->getBeginLoc());
5348 EmitStoreOfScalar(DepAddr
.getPointer(), DOLVal
);
5351 if (const auto *DC
= S
.getSingleClause
<OMPDestroyClause
>()) {
5352 CGM
.getOpenMPRuntime().emitDestroyClause(*this, DOLVal
, DC
->getBeginLoc());
5355 if (const auto *UC
= S
.getSingleClause
<OMPUpdateClause
>()) {
5356 CGM
.getOpenMPRuntime().emitUpdateClause(
5357 *this, DOLVal
, UC
->getDependencyKind(), UC
->getBeginLoc());
5362 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective
&S
) {
5363 if (!OMPParentLoopDirectiveForScan
)
5365 const OMPExecutableDirective
&ParentDir
= *OMPParentLoopDirectiveForScan
;
5366 bool IsInclusive
= S
.hasClausesOfKind
<OMPInclusiveClause
>();
5367 SmallVector
<const Expr
*, 4> Shareds
;
5368 SmallVector
<const Expr
*, 4> Privates
;
5369 SmallVector
<const Expr
*, 4> LHSs
;
5370 SmallVector
<const Expr
*, 4> RHSs
;
5371 SmallVector
<const Expr
*, 4> ReductionOps
;
5372 SmallVector
<const Expr
*, 4> CopyOps
;
5373 SmallVector
<const Expr
*, 4> CopyArrayTemps
;
5374 SmallVector
<const Expr
*, 4> CopyArrayElems
;
5375 for (const auto *C
: ParentDir
.getClausesOfKind
<OMPReductionClause
>()) {
5376 if (C
->getModifier() != OMPC_REDUCTION_inscan
)
5378 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
5379 Privates
.append(C
->privates().begin(), C
->privates().end());
5380 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
5381 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
5382 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
5383 CopyOps
.append(C
->copy_ops().begin(), C
->copy_ops().end());
5384 CopyArrayTemps
.append(C
->copy_array_temps().begin(),
5385 C
->copy_array_temps().end());
5386 CopyArrayElems
.append(C
->copy_array_elems().begin(),
5387 C
->copy_array_elems().end());
5389 if (ParentDir
.getDirectiveKind() == OMPD_simd
||
5390 (getLangOpts().OpenMPSimd
&&
5391 isOpenMPSimdDirective(ParentDir
.getDirectiveKind()))) {
5392 // For simd directive and simd-based directives in simd only mode, use the
5393 // following codegen:
5395 // #pragma omp simd reduction(inscan, +: x)
5398 // #pragma omp scan inclusive(x)
5401 // is transformed to:
5412 // #pragma omp simd reduction(inscan, +: x)
5415 // #pragma omp scan exclusive(x)
5428 llvm::BasicBlock
*OMPScanReduce
= createBasicBlock("omp.inscan.reduce");
5429 EmitBranch(IsInclusive
5431 : BreakContinueStack
.back().ContinueBlock
.getBlock());
5432 EmitBlock(OMPScanDispatch
);
5434 // New scope for correct construction/destruction of temp variables for
5436 LexicalScope
Scope(*this, S
.getSourceRange());
5437 EmitBranch(IsInclusive
? OMPBeforeScanBlock
: OMPAfterScanBlock
);
5438 EmitBlock(OMPScanReduce
);
5440 // Create temp var and copy LHS value to this temp value.
5442 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5443 const Expr
*PrivateExpr
= Privates
[I
];
5444 const Expr
*TempExpr
= CopyArrayTemps
[I
];
5446 *cast
<VarDecl
>(cast
<DeclRefExpr
>(TempExpr
)->getDecl()));
5447 LValue DestLVal
= EmitLValue(TempExpr
);
5448 LValue SrcLVal
= EmitLValue(LHSs
[I
]);
5449 EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(*this),
5450 SrcLVal
.getAddress(*this),
5451 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5452 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
5456 CGM
.getOpenMPRuntime().emitReduction(
5457 *this, ParentDir
.getEndLoc(), Privates
, LHSs
, RHSs
, ReductionOps
,
5458 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd
});
5459 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5460 const Expr
*PrivateExpr
= Privates
[I
];
5464 DestLVal
= EmitLValue(RHSs
[I
]);
5465 SrcLVal
= EmitLValue(LHSs
[I
]);
5467 const Expr
*TempExpr
= CopyArrayTemps
[I
];
5468 DestLVal
= EmitLValue(RHSs
[I
]);
5469 SrcLVal
= EmitLValue(TempExpr
);
5471 EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(*this),
5472 SrcLVal
.getAddress(*this),
5473 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5474 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
5478 EmitBranch(IsInclusive
? OMPAfterScanBlock
: OMPBeforeScanBlock
);
5479 OMPScanExitBlock
= IsInclusive
5480 ? BreakContinueStack
.back().ContinueBlock
.getBlock()
5482 EmitBlock(OMPAfterScanBlock
);
5486 EmitBranch(BreakContinueStack
.back().ContinueBlock
.getBlock());
5487 EmitBlock(OMPScanExitBlock
);
5489 if (OMPFirstScanLoop
) {
5490 // Emit buffer[i] = red; at the end of the input phase.
5491 const auto *IVExpr
= cast
<OMPLoopDirective
>(ParentDir
)
5492 .getIterationVariable()
5493 ->IgnoreParenImpCasts();
5494 LValue IdxLVal
= EmitLValue(IVExpr
);
5495 llvm::Value
*IdxVal
= EmitLoadOfScalar(IdxLVal
, IVExpr
->getExprLoc());
5496 IdxVal
= Builder
.CreateIntCast(IdxVal
, SizeTy
, /*isSigned=*/false);
5497 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5498 const Expr
*PrivateExpr
= Privates
[I
];
5499 const Expr
*OrigExpr
= Shareds
[I
];
5500 const Expr
*CopyArrayElem
= CopyArrayElems
[I
];
5501 OpaqueValueMapping
IdxMapping(
5503 cast
<OpaqueValueExpr
>(
5504 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
5505 RValue::get(IdxVal
));
5506 LValue DestLVal
= EmitLValue(CopyArrayElem
);
5507 LValue SrcLVal
= EmitLValue(OrigExpr
);
5508 EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(*this),
5509 SrcLVal
.getAddress(*this),
5510 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5511 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
5515 EmitBranch(BreakContinueStack
.back().ContinueBlock
.getBlock());
5517 EmitBlock(OMPScanExitBlock
);
5518 EmitBranch(BreakContinueStack
.back().ContinueBlock
.getBlock());
5520 EmitBlock(OMPScanDispatch
);
5521 if (!OMPFirstScanLoop
) {
5522 // Emit red = buffer[i]; at the entrance to the scan phase.
5523 const auto *IVExpr
= cast
<OMPLoopDirective
>(ParentDir
)
5524 .getIterationVariable()
5525 ->IgnoreParenImpCasts();
5526 LValue IdxLVal
= EmitLValue(IVExpr
);
5527 llvm::Value
*IdxVal
= EmitLoadOfScalar(IdxLVal
, IVExpr
->getExprLoc());
5528 IdxVal
= Builder
.CreateIntCast(IdxVal
, SizeTy
, /*isSigned=*/false);
5529 llvm::BasicBlock
*ExclusiveExitBB
= nullptr;
5531 llvm::BasicBlock
*ContBB
= createBasicBlock("omp.exclusive.dec");
5532 ExclusiveExitBB
= createBasicBlock("omp.exclusive.copy.exit");
5533 llvm::Value
*Cmp
= Builder
.CreateIsNull(IdxVal
);
5534 Builder
.CreateCondBr(Cmp
, ExclusiveExitBB
, ContBB
);
5536 // Use idx - 1 iteration for exclusive scan.
5537 IdxVal
= Builder
.CreateNUWSub(IdxVal
, llvm::ConstantInt::get(SizeTy
, 1));
5539 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5540 const Expr
*PrivateExpr
= Privates
[I
];
5541 const Expr
*OrigExpr
= Shareds
[I
];
5542 const Expr
*CopyArrayElem
= CopyArrayElems
[I
];
5543 OpaqueValueMapping
IdxMapping(
5545 cast
<OpaqueValueExpr
>(
5546 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
5547 RValue::get(IdxVal
));
5548 LValue SrcLVal
= EmitLValue(CopyArrayElem
);
5549 LValue DestLVal
= EmitLValue(OrigExpr
);
5550 EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(*this),
5551 SrcLVal
.getAddress(*this),
5552 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5553 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
5557 EmitBlock(ExclusiveExitBB
);
5560 EmitBranch((OMPFirstScanLoop
== IsInclusive
) ? OMPBeforeScanBlock
5561 : OMPAfterScanBlock
);
5562 EmitBlock(OMPAfterScanBlock
);
5565 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective
&S
,
5566 const CodeGenLoopTy
&CodeGenLoop
,
5568 // Emit the loop iteration variable.
5569 const auto *IVExpr
= cast
<DeclRefExpr
>(S
.getIterationVariable());
5570 const auto *IVDecl
= cast
<VarDecl
>(IVExpr
->getDecl());
5571 EmitVarDecl(*IVDecl
);
5573 // Emit the iterations count variable.
5574 // If it is not a variable, Sema decided to calculate iterations count on each
5575 // iteration (e.g., it is foldable into a constant).
5576 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
5577 EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
5578 // Emit calculation of the iterations count.
5579 EmitIgnoredExpr(S
.getCalcLastIteration());
5582 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
5584 bool HasLastprivateClause
= false;
5585 // Check pre-condition.
5587 OMPLoopScope
PreInitScope(*this, S
);
5588 // Skip the entire loop if we don't meet the precondition.
5589 // If the condition constant folds and can be elided, avoid emitting the
5592 llvm::BasicBlock
*ContBlock
= nullptr;
5593 if (ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
5597 llvm::BasicBlock
*ThenBlock
= createBasicBlock("omp.precond.then");
5598 ContBlock
= createBasicBlock("omp.precond.end");
5599 emitPreCond(*this, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
5600 getProfileCount(&S
));
5601 EmitBlock(ThenBlock
);
5602 incrementProfileCounter(&S
);
5605 emitAlignedClause(*this, S
);
5606 // Emit 'then' code.
5608 // Emit helper vars inits.
5610 LValue LB
= EmitOMPHelperVar(
5611 *this, cast
<DeclRefExpr
>(
5612 (isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5613 ? S
.getCombinedLowerBoundVariable()
5614 : S
.getLowerBoundVariable())));
5615 LValue UB
= EmitOMPHelperVar(
5616 *this, cast
<DeclRefExpr
>(
5617 (isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5618 ? S
.getCombinedUpperBoundVariable()
5619 : S
.getUpperBoundVariable())));
5621 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getStrideVariable()));
5623 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getIsLastIterVariable()));
5625 OMPPrivateScope
LoopScope(*this);
5626 if (EmitOMPFirstprivateClause(S
, LoopScope
)) {
5627 // Emit implicit barrier to synchronize threads and avoid data races
5628 // on initialization of firstprivate variables and post-update of
5629 // lastprivate variables.
5630 CGM
.getOpenMPRuntime().emitBarrierCall(
5631 *this, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
5632 /*ForceSimpleCall=*/true);
5634 EmitOMPPrivateClause(S
, LoopScope
);
5635 if (isOpenMPSimdDirective(S
.getDirectiveKind()) &&
5636 !isOpenMPParallelDirective(S
.getDirectiveKind()) &&
5637 !isOpenMPTeamsDirective(S
.getDirectiveKind()))
5638 EmitOMPReductionClauseInit(S
, LoopScope
);
5639 HasLastprivateClause
= EmitOMPLastprivateClauseInit(S
, LoopScope
);
5640 EmitOMPPrivateLoopCounters(S
, LoopScope
);
5641 (void)LoopScope
.Privatize();
5642 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
5643 CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S
);
5645 // Detect the distribute schedule kind and chunk.
5646 llvm::Value
*Chunk
= nullptr;
5647 OpenMPDistScheduleClauseKind ScheduleKind
= OMPC_DIST_SCHEDULE_unknown
;
5648 if (const auto *C
= S
.getSingleClause
<OMPDistScheduleClause
>()) {
5649 ScheduleKind
= C
->getDistScheduleKind();
5650 if (const Expr
*Ch
= C
->getChunkSize()) {
5651 Chunk
= EmitScalarExpr(Ch
);
5652 Chunk
= EmitScalarConversion(Chunk
, Ch
->getType(),
5653 S
.getIterationVariable()->getType(),
5657 // Default behaviour for dist_schedule clause.
5658 CGM
.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5659 *this, S
, ScheduleKind
, Chunk
);
5661 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
5662 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
5664 // OpenMP [2.10.8, distribute Construct, Description]
5665 // If dist_schedule is specified, kind must be static. If specified,
5666 // iterations are divided into chunks of size chunk_size, chunks are
5667 // assigned to the teams of the league in a round-robin fashion in the
5668 // order of the team number. When no chunk_size is specified, the
5669 // iteration space is divided into chunks that are approximately equal
5670 // in size, and at most one chunk is distributed to each team of the
5671 // league. The size of the chunks is unspecified in this case.
5672 bool StaticChunked
=
5673 RT
.isStaticChunked(ScheduleKind
, /* Chunked */ Chunk
!= nullptr) &&
5674 isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind());
5675 if (RT
.isStaticNonchunked(ScheduleKind
,
5676 /* Chunked */ Chunk
!= nullptr) ||
5678 CGOpenMPRuntime::StaticRTInput
StaticInit(
5679 IVSize
, IVSigned
, /* Ordered = */ false, IL
.getAddress(*this),
5680 LB
.getAddress(*this), UB
.getAddress(*this), ST
.getAddress(*this),
5681 StaticChunked
? Chunk
: nullptr);
5682 RT
.emitDistributeStaticInit(*this, S
.getBeginLoc(), ScheduleKind
,
5685 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
5686 // UB = min(UB, GlobalUB);
5687 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5688 ? S
.getCombinedEnsureUpperBound()
5689 : S
.getEnsureUpperBound());
5691 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5692 ? S
.getCombinedInit()
5696 isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5697 ? S
.getCombinedCond()
5701 Cond
= S
.getCombinedDistCond();
5703 // For static unchunked schedules generate:
5705 // 1. For distribute alone, codegen
5706 // while (idx <= UB) {
5711 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
5712 // while (idx <= UB) {
5713 // <CodeGen rest of pragma>(LB, UB);
5717 // For static chunk one schedule generate:
5719 // while (IV <= GlobalUB) {
5720 // <CodeGen rest of pragma>(LB, UB);
5723 // UB = min(UB, GlobalUB);
5729 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5730 if (isOpenMPSimdDirective(S
.getDirectiveKind()))
5731 CGF
.EmitOMPSimdInit(S
);
5733 [&S
, &LoopScope
, Cond
, IncExpr
, LoopExit
, &CodeGenLoop
,
5734 StaticChunked
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5735 CGF
.EmitOMPInnerLoop(
5736 S
, LoopScope
.requiresCleanups(), Cond
, IncExpr
,
5737 [&S
, LoopExit
, &CodeGenLoop
](CodeGenFunction
&CGF
) {
5738 CodeGenLoop(CGF
, S
, LoopExit
);
5740 [&S
, StaticChunked
](CodeGenFunction
&CGF
) {
5741 if (StaticChunked
) {
5742 CGF
.EmitIgnoredExpr(S
.getCombinedNextLowerBound());
5743 CGF
.EmitIgnoredExpr(S
.getCombinedNextUpperBound());
5744 CGF
.EmitIgnoredExpr(S
.getCombinedEnsureUpperBound());
5745 CGF
.EmitIgnoredExpr(S
.getCombinedInit());
5749 EmitBlock(LoopExit
.getBlock());
5750 // Tell the runtime we are done.
5751 RT
.emitForStaticFinish(*this, S
.getEndLoc(), S
.getDirectiveKind());
5753 // Emit the outer loop, which requests its work chunk [LB..UB] from
5754 // runtime and runs the inner loop to process it.
5755 const OMPLoopArguments LoopArguments
= {
5756 LB
.getAddress(*this), UB
.getAddress(*this), ST
.getAddress(*this),
5757 IL
.getAddress(*this), Chunk
};
5758 EmitOMPDistributeOuterLoop(ScheduleKind
, S
, LoopScope
, LoopArguments
,
5761 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
5762 EmitOMPSimdFinal(S
, [IL
, &S
](CodeGenFunction
&CGF
) {
5763 return CGF
.Builder
.CreateIsNotNull(
5764 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
5767 if (isOpenMPSimdDirective(S
.getDirectiveKind()) &&
5768 !isOpenMPParallelDirective(S
.getDirectiveKind()) &&
5769 !isOpenMPTeamsDirective(S
.getDirectiveKind())) {
5770 EmitOMPReductionClauseFinal(S
, OMPD_simd
);
5771 // Emit post-update of the reduction variables if IsLastIter != 0.
5772 emitPostUpdateForReductionClause(
5773 *this, S
, [IL
, &S
](CodeGenFunction
&CGF
) {
5774 return CGF
.Builder
.CreateIsNotNull(
5775 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
5778 // Emit final copy of the lastprivate variables if IsLastIter != 0.
5779 if (HasLastprivateClause
) {
5780 EmitOMPLastprivateClauseFinal(
5781 S
, /*NoFinals=*/false,
5782 Builder
.CreateIsNotNull(EmitLoadOfScalar(IL
, S
.getBeginLoc())));
5786 // We're now done with the loop, so jump to the continuation block.
5788 EmitBranch(ContBlock
);
5789 EmitBlock(ContBlock
, true);
5794 void CodeGenFunction::EmitOMPDistributeDirective(
5795 const OMPDistributeDirective
&S
) {
5796 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5797 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
5799 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5800 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute
, CodeGen
);
5803 static llvm::Function
*emitOutlinedOrderedFunction(CodeGenModule
&CGM
,
5804 const CapturedStmt
*S
,
5805 SourceLocation Loc
) {
5806 CodeGenFunction
CGF(CGM
, /*suppressNewContext=*/true);
5807 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo
;
5808 CGF
.CapturedStmtInfo
= &CapStmtInfo
;
5809 llvm::Function
*Fn
= CGF
.GenerateOpenMPCapturedStmtFunction(*S
, Loc
);
5810 Fn
->setDoesNotRecurse();
5814 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective
&S
) {
5815 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
5816 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
5817 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
5819 if (S
.hasClausesOfKind
<OMPDependClause
>()) {
5820 // The ordered directive with depend clause.
5821 assert(!S
.hasAssociatedStmt() &&
5822 "No associated statement must be in ordered depend construct.");
5823 InsertPointTy
AllocaIP(AllocaInsertPt
->getParent(),
5824 AllocaInsertPt
->getIterator());
5825 for (const auto *DC
: S
.getClausesOfKind
<OMPDependClause
>()) {
5826 unsigned NumLoops
= DC
->getNumLoops();
5827 QualType Int64Ty
= CGM
.getContext().getIntTypeForBitwidth(
5828 /*DestWidth=*/64, /*Signed=*/1);
5829 llvm::SmallVector
<llvm::Value
*> StoreValues
;
5830 for (unsigned I
= 0; I
< NumLoops
; I
++) {
5831 const Expr
*CounterVal
= DC
->getLoopData(I
);
5833 llvm::Value
*StoreValue
= EmitScalarConversion(
5834 EmitScalarExpr(CounterVal
), CounterVal
->getType(), Int64Ty
,
5835 CounterVal
->getExprLoc());
5836 StoreValues
.emplace_back(StoreValue
);
5838 bool IsDependSource
= false;
5839 if (DC
->getDependencyKind() == OMPC_DEPEND_source
)
5840 IsDependSource
= true;
5841 Builder
.restoreIP(OMPBuilder
.createOrderedDepend(
5842 Builder
, AllocaIP
, NumLoops
, StoreValues
, ".cnt.addr",
5846 // The ordered directive with threads or simd clause, or without clause.
5847 // Without clause, it behaves as if the threads clause is specified.
5848 const auto *C
= S
.getSingleClause
<OMPSIMDClause
>();
5850 auto FiniCB
= [this](InsertPointTy IP
) {
5851 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
5854 auto BodyGenCB
= [&S
, C
, this](InsertPointTy AllocaIP
,
5855 InsertPointTy CodeGenIP
) {
5856 Builder
.restoreIP(CodeGenIP
);
5858 const CapturedStmt
*CS
= S
.getInnermostCapturedStmt();
5860 llvm::BasicBlock
*FiniBB
= splitBBWithSuffix(
5861 Builder
, /*CreateBranch=*/false, ".ordered.after");
5862 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
5863 GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
5864 llvm::Function
*OutlinedFn
=
5865 emitOutlinedOrderedFunction(CGM
, CS
, S
.getBeginLoc());
5866 assert(S
.getBeginLoc().isValid() &&
5867 "Outlined function call location must be valid.");
5868 ApplyDebugLocation::CreateDefaultArtificial(*this, S
.getBeginLoc());
5869 OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP
, *FiniBB
,
5870 OutlinedFn
, CapturedVars
);
5872 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
5873 *this, CS
->getCapturedStmt(), AllocaIP
, CodeGenIP
, "ordered");
5877 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5879 OMPBuilder
.createOrderedThreadsSimd(Builder
, BodyGenCB
, FiniCB
, !C
));
5884 if (S
.hasClausesOfKind
<OMPDependClause
>()) {
5885 assert(!S
.hasAssociatedStmt() &&
5886 "No associated statement must be in ordered depend construct.");
5887 for (const auto *DC
: S
.getClausesOfKind
<OMPDependClause
>())
5888 CGM
.getOpenMPRuntime().emitDoacrossOrdered(*this, DC
);
5891 const auto *C
= S
.getSingleClause
<OMPSIMDClause
>();
5892 auto &&CodeGen
= [&S
, C
, this](CodeGenFunction
&CGF
,
5893 PrePostActionTy
&Action
) {
5894 const CapturedStmt
*CS
= S
.getInnermostCapturedStmt();
5896 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
5897 CGF
.GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
5898 llvm::Function
*OutlinedFn
=
5899 emitOutlinedOrderedFunction(CGM
, CS
, S
.getBeginLoc());
5900 CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, S
.getBeginLoc(),
5901 OutlinedFn
, CapturedVars
);
5904 CGF
.EmitStmt(CS
->getCapturedStmt());
5907 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5908 CGM
.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen
, S
.getBeginLoc(), !C
);
5911 static llvm::Value
*convertToScalarValue(CodeGenFunction
&CGF
, RValue Val
,
5912 QualType SrcType
, QualType DestType
,
5913 SourceLocation Loc
) {
5914 assert(CGF
.hasScalarEvaluationKind(DestType
) &&
5915 "DestType must have scalar evaluation kind.");
5916 assert(!Val
.isAggregate() && "Must be a scalar or complex.");
5917 return Val
.isScalar() ? CGF
.EmitScalarConversion(Val
.getScalarVal(), SrcType
,
5919 : CGF
.EmitComplexToScalarConversion(
5920 Val
.getComplexVal(), SrcType
, DestType
, Loc
);
5923 static CodeGenFunction::ComplexPairTy
5924 convertToComplexValue(CodeGenFunction
&CGF
, RValue Val
, QualType SrcType
,
5925 QualType DestType
, SourceLocation Loc
) {
5926 assert(CGF
.getEvaluationKind(DestType
) == TEK_Complex
&&
5927 "DestType must have complex evaluation kind.");
5928 CodeGenFunction::ComplexPairTy ComplexVal
;
5929 if (Val
.isScalar()) {
5930 // Convert the input element to the element type of the complex.
5931 QualType DestElementType
=
5932 DestType
->castAs
<ComplexType
>()->getElementType();
5933 llvm::Value
*ScalarVal
= CGF
.EmitScalarConversion(
5934 Val
.getScalarVal(), SrcType
, DestElementType
, Loc
);
5935 ComplexVal
= CodeGenFunction::ComplexPairTy(
5936 ScalarVal
, llvm::Constant::getNullValue(ScalarVal
->getType()));
5938 assert(Val
.isComplex() && "Must be a scalar or complex.");
5939 QualType SrcElementType
= SrcType
->castAs
<ComplexType
>()->getElementType();
5940 QualType DestElementType
=
5941 DestType
->castAs
<ComplexType
>()->getElementType();
5942 ComplexVal
.first
= CGF
.EmitScalarConversion(
5943 Val
.getComplexVal().first
, SrcElementType
, DestElementType
, Loc
);
5944 ComplexVal
.second
= CGF
.EmitScalarConversion(
5945 Val
.getComplexVal().second
, SrcElementType
, DestElementType
, Loc
);
5950 static void emitSimpleAtomicStore(CodeGenFunction
&CGF
, llvm::AtomicOrdering AO
,
5951 LValue LVal
, RValue RVal
) {
5952 if (LVal
.isGlobalReg())
5953 CGF
.EmitStoreThroughGlobalRegLValue(RVal
, LVal
);
5955 CGF
.EmitAtomicStore(RVal
, LVal
, AO
, LVal
.isVolatile(), /*isInit=*/false);
5958 static RValue
emitSimpleAtomicLoad(CodeGenFunction
&CGF
,
5959 llvm::AtomicOrdering AO
, LValue LVal
,
5960 SourceLocation Loc
) {
5961 if (LVal
.isGlobalReg())
5962 return CGF
.EmitLoadOfLValue(LVal
, Loc
);
5963 return CGF
.EmitAtomicLoad(
5964 LVal
, Loc
, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO
),
5968 void CodeGenFunction::emitOMPSimpleStore(LValue LVal
, RValue RVal
,
5969 QualType RValTy
, SourceLocation Loc
) {
5970 switch (getEvaluationKind(LVal
.getType())) {
5972 EmitStoreThroughLValue(RValue::get(convertToScalarValue(
5973 *this, RVal
, RValTy
, LVal
.getType(), Loc
)),
5978 convertToComplexValue(*this, RVal
, RValTy
, LVal
.getType(), Loc
), LVal
,
5982 llvm_unreachable("Must be a scalar or complex.");
5986 static void emitOMPAtomicReadExpr(CodeGenFunction
&CGF
, llvm::AtomicOrdering AO
,
5987 const Expr
*X
, const Expr
*V
,
5988 SourceLocation Loc
) {
5990 assert(V
->isLValue() && "V of 'omp atomic read' is not lvalue");
5991 assert(X
->isLValue() && "X of 'omp atomic read' is not lvalue");
5992 LValue XLValue
= CGF
.EmitLValue(X
);
5993 LValue VLValue
= CGF
.EmitLValue(V
);
5994 RValue Res
= emitSimpleAtomicLoad(CGF
, AO
, XLValue
, Loc
);
5995 // OpenMP, 2.17.7, atomic Construct
5996 // If the read or capture clause is specified and the acquire, acq_rel, or
5997 // seq_cst clause is specified then the strong flush on exit from the atomic
5998 // operation is also an acquire flush.
6000 case llvm::AtomicOrdering::Acquire
:
6001 case llvm::AtomicOrdering::AcquireRelease
:
6002 case llvm::AtomicOrdering::SequentiallyConsistent
:
6003 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6004 llvm::AtomicOrdering::Acquire
);
6006 case llvm::AtomicOrdering::Monotonic
:
6007 case llvm::AtomicOrdering::Release
:
6009 case llvm::AtomicOrdering::NotAtomic
:
6010 case llvm::AtomicOrdering::Unordered
:
6011 llvm_unreachable("Unexpected ordering.");
6013 CGF
.emitOMPSimpleStore(VLValue
, Res
, X
->getType().getNonReferenceType(), Loc
);
6014 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, V
);
6017 static void emitOMPAtomicWriteExpr(CodeGenFunction
&CGF
,
6018 llvm::AtomicOrdering AO
, const Expr
*X
,
6019 const Expr
*E
, SourceLocation Loc
) {
6021 assert(X
->isLValue() && "X of 'omp atomic write' is not lvalue");
6022 emitSimpleAtomicStore(CGF
, AO
, CGF
.EmitLValue(X
), CGF
.EmitAnyExpr(E
));
6023 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6024 // OpenMP, 2.17.7, atomic Construct
6025 // If the write, update, or capture clause is specified and the release,
6026 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6027 // the atomic operation is also a release flush.
6029 case llvm::AtomicOrdering::Release
:
6030 case llvm::AtomicOrdering::AcquireRelease
:
6031 case llvm::AtomicOrdering::SequentiallyConsistent
:
6032 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6033 llvm::AtomicOrdering::Release
);
6035 case llvm::AtomicOrdering::Acquire
:
6036 case llvm::AtomicOrdering::Monotonic
:
6038 case llvm::AtomicOrdering::NotAtomic
:
6039 case llvm::AtomicOrdering::Unordered
:
6040 llvm_unreachable("Unexpected ordering.");
6044 static std::pair
<bool, RValue
> emitOMPAtomicRMW(CodeGenFunction
&CGF
, LValue X
,
6046 BinaryOperatorKind BO
,
6047 llvm::AtomicOrdering AO
,
6048 bool IsXLHSInRHSPart
) {
6049 ASTContext
&Context
= CGF
.getContext();
6050 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6051 // expression is simple and atomic is allowed for the given type for the
6053 if (BO
== BO_Comma
|| !Update
.isScalar() || !X
.isSimple() ||
6054 (!isa
<llvm::ConstantInt
>(Update
.getScalarVal()) &&
6055 (Update
.getScalarVal()->getType() !=
6056 X
.getAddress(CGF
).getElementType())) ||
6057 !Context
.getTargetInfo().hasBuiltinAtomic(
6058 Context
.getTypeSize(X
.getType()), Context
.toBits(X
.getAlignment())))
6059 return std::make_pair(false, RValue::get(nullptr));
6061 auto &&CheckAtomicSupport
= [&CGF
](llvm::Type
*T
, BinaryOperatorKind BO
) {
6062 if (T
->isIntegerTy())
6065 if (T
->isFloatingPointTy() && (BO
== BO_Add
|| BO
== BO_Sub
))
6066 return llvm::isPowerOf2_64(CGF
.CGM
.getDataLayout().getTypeStoreSize(T
));
6071 if (!CheckAtomicSupport(Update
.getScalarVal()->getType(), BO
) ||
6072 !CheckAtomicSupport(X
.getAddress(CGF
).getElementType(), BO
))
6073 return std::make_pair(false, RValue::get(nullptr));
6075 bool IsInteger
= X
.getAddress(CGF
).getElementType()->isIntegerTy();
6076 llvm::AtomicRMWInst::BinOp RMWOp
;
6079 RMWOp
= IsInteger
? llvm::AtomicRMWInst::Add
: llvm::AtomicRMWInst::FAdd
;
6082 if (!IsXLHSInRHSPart
)
6083 return std::make_pair(false, RValue::get(nullptr));
6084 RMWOp
= IsInteger
? llvm::AtomicRMWInst::Sub
: llvm::AtomicRMWInst::FSub
;
6087 RMWOp
= llvm::AtomicRMWInst::And
;
6090 RMWOp
= llvm::AtomicRMWInst::Or
;
6093 RMWOp
= llvm::AtomicRMWInst::Xor
;
6097 RMWOp
= X
.getType()->hasSignedIntegerRepresentation()
6098 ? (IsXLHSInRHSPart
? llvm::AtomicRMWInst::Min
6099 : llvm::AtomicRMWInst::Max
)
6100 : (IsXLHSInRHSPart
? llvm::AtomicRMWInst::UMin
6101 : llvm::AtomicRMWInst::UMax
);
6103 RMWOp
= IsXLHSInRHSPart
? llvm::AtomicRMWInst::FMin
6104 : llvm::AtomicRMWInst::FMax
;
6108 RMWOp
= X
.getType()->hasSignedIntegerRepresentation()
6109 ? (IsXLHSInRHSPart
? llvm::AtomicRMWInst::Max
6110 : llvm::AtomicRMWInst::Min
)
6111 : (IsXLHSInRHSPart
? llvm::AtomicRMWInst::UMax
6112 : llvm::AtomicRMWInst::UMin
);
6114 RMWOp
= IsXLHSInRHSPart
? llvm::AtomicRMWInst::FMax
6115 : llvm::AtomicRMWInst::FMin
;
6118 RMWOp
= llvm::AtomicRMWInst::Xchg
;
6127 return std::make_pair(false, RValue::get(nullptr));
6146 llvm_unreachable("Unsupported atomic update operation");
6148 llvm::Value
*UpdateVal
= Update
.getScalarVal();
6149 if (auto *IC
= dyn_cast
<llvm::ConstantInt
>(UpdateVal
)) {
6151 UpdateVal
= CGF
.Builder
.CreateIntCast(
6152 IC
, X
.getAddress(CGF
).getElementType(),
6153 X
.getType()->hasSignedIntegerRepresentation());
6155 UpdateVal
= CGF
.Builder
.CreateCast(llvm::Instruction::CastOps::UIToFP
, IC
,
6156 X
.getAddress(CGF
).getElementType());
6159 CGF
.Builder
.CreateAtomicRMW(RMWOp
, X
.getPointer(CGF
), UpdateVal
, AO
);
6160 return std::make_pair(true, RValue::get(Res
));
6163 std::pair
<bool, RValue
> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6164 LValue X
, RValue E
, BinaryOperatorKind BO
, bool IsXLHSInRHSPart
,
6165 llvm::AtomicOrdering AO
, SourceLocation Loc
,
6166 const llvm::function_ref
<RValue(RValue
)> CommonGen
) {
6167 // Update expressions are allowed to have the following forms:
6168 // x binop= expr; -> xrval + expr;
6169 // x++, ++x -> xrval + 1;
6170 // x--, --x -> xrval - 1;
6171 // x = x binop expr; -> xrval binop expr
6172 // x = expr Op x; - > expr binop xrval;
6173 auto Res
= emitOMPAtomicRMW(*this, X
, E
, BO
, AO
, IsXLHSInRHSPart
);
6175 if (X
.isGlobalReg()) {
6176 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6178 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X
, Loc
)), X
);
6180 // Perform compare-and-swap procedure.
6181 EmitAtomicUpdate(X
, AO
, CommonGen
, X
.getType().isVolatileQualified());
6187 static void emitOMPAtomicUpdateExpr(CodeGenFunction
&CGF
,
6188 llvm::AtomicOrdering AO
, const Expr
*X
,
6189 const Expr
*E
, const Expr
*UE
,
6190 bool IsXLHSInRHSPart
, SourceLocation Loc
) {
6191 assert(isa
<BinaryOperator
>(UE
->IgnoreImpCasts()) &&
6192 "Update expr in 'atomic update' must be a binary operator.");
6193 const auto *BOUE
= cast
<BinaryOperator
>(UE
->IgnoreImpCasts());
6194 // Update expressions are allowed to have the following forms:
6195 // x binop= expr; -> xrval + expr;
6196 // x++, ++x -> xrval + 1;
6197 // x--, --x -> xrval - 1;
6198 // x = x binop expr; -> xrval binop expr
6199 // x = expr Op x; - > expr binop xrval;
6200 assert(X
->isLValue() && "X of 'omp atomic update' is not lvalue");
6201 LValue XLValue
= CGF
.EmitLValue(X
);
6202 RValue ExprRValue
= CGF
.EmitAnyExpr(E
);
6203 const auto *LHS
= cast
<OpaqueValueExpr
>(BOUE
->getLHS()->IgnoreImpCasts());
6204 const auto *RHS
= cast
<OpaqueValueExpr
>(BOUE
->getRHS()->IgnoreImpCasts());
6205 const OpaqueValueExpr
*XRValExpr
= IsXLHSInRHSPart
? LHS
: RHS
;
6206 const OpaqueValueExpr
*ERValExpr
= IsXLHSInRHSPart
? RHS
: LHS
;
6207 auto &&Gen
= [&CGF
, UE
, ExprRValue
, XRValExpr
, ERValExpr
](RValue XRValue
) {
6208 CodeGenFunction::OpaqueValueMapping
MapExpr(CGF
, ERValExpr
, ExprRValue
);
6209 CodeGenFunction::OpaqueValueMapping
MapX(CGF
, XRValExpr
, XRValue
);
6210 return CGF
.EmitAnyExpr(UE
);
6212 (void)CGF
.EmitOMPAtomicSimpleUpdateExpr(
6213 XLValue
, ExprRValue
, BOUE
->getOpcode(), IsXLHSInRHSPart
, AO
, Loc
, Gen
);
6214 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6215 // OpenMP, 2.17.7, atomic Construct
6216 // If the write, update, or capture clause is specified and the release,
6217 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6218 // the atomic operation is also a release flush.
6220 case llvm::AtomicOrdering::Release
:
6221 case llvm::AtomicOrdering::AcquireRelease
:
6222 case llvm::AtomicOrdering::SequentiallyConsistent
:
6223 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6224 llvm::AtomicOrdering::Release
);
6226 case llvm::AtomicOrdering::Acquire
:
6227 case llvm::AtomicOrdering::Monotonic
:
6229 case llvm::AtomicOrdering::NotAtomic
:
6230 case llvm::AtomicOrdering::Unordered
:
6231 llvm_unreachable("Unexpected ordering.");
6235 static RValue
convertToType(CodeGenFunction
&CGF
, RValue Value
,
6236 QualType SourceType
, QualType ResType
,
6237 SourceLocation Loc
) {
6238 switch (CGF
.getEvaluationKind(ResType
)) {
6241 convertToScalarValue(CGF
, Value
, SourceType
, ResType
, Loc
));
6243 auto Res
= convertToComplexValue(CGF
, Value
, SourceType
, ResType
, Loc
);
6244 return RValue::getComplex(Res
.first
, Res
.second
);
6249 llvm_unreachable("Must be a scalar or complex.");
6252 static void emitOMPAtomicCaptureExpr(CodeGenFunction
&CGF
,
6253 llvm::AtomicOrdering AO
,
6254 bool IsPostfixUpdate
, const Expr
*V
,
6255 const Expr
*X
, const Expr
*E
,
6256 const Expr
*UE
, bool IsXLHSInRHSPart
,
6257 SourceLocation Loc
) {
6258 assert(X
->isLValue() && "X of 'omp atomic capture' is not lvalue");
6259 assert(V
->isLValue() && "V of 'omp atomic capture' is not lvalue");
6261 LValue VLValue
= CGF
.EmitLValue(V
);
6262 LValue XLValue
= CGF
.EmitLValue(X
);
6263 RValue ExprRValue
= CGF
.EmitAnyExpr(E
);
6264 QualType NewVValType
;
6266 // 'x' is updated with some additional value.
6267 assert(isa
<BinaryOperator
>(UE
->IgnoreImpCasts()) &&
6268 "Update expr in 'atomic capture' must be a binary operator.");
6269 const auto *BOUE
= cast
<BinaryOperator
>(UE
->IgnoreImpCasts());
6270 // Update expressions are allowed to have the following forms:
6271 // x binop= expr; -> xrval + expr;
6272 // x++, ++x -> xrval + 1;
6273 // x--, --x -> xrval - 1;
6274 // x = x binop expr; -> xrval binop expr
6275 // x = expr Op x; - > expr binop xrval;
6276 const auto *LHS
= cast
<OpaqueValueExpr
>(BOUE
->getLHS()->IgnoreImpCasts());
6277 const auto *RHS
= cast
<OpaqueValueExpr
>(BOUE
->getRHS()->IgnoreImpCasts());
6278 const OpaqueValueExpr
*XRValExpr
= IsXLHSInRHSPart
? LHS
: RHS
;
6279 NewVValType
= XRValExpr
->getType();
6280 const OpaqueValueExpr
*ERValExpr
= IsXLHSInRHSPart
? RHS
: LHS
;
6281 auto &&Gen
= [&CGF
, &NewVVal
, UE
, ExprRValue
, XRValExpr
, ERValExpr
,
6282 IsPostfixUpdate
](RValue XRValue
) {
6283 CodeGenFunction::OpaqueValueMapping
MapExpr(CGF
, ERValExpr
, ExprRValue
);
6284 CodeGenFunction::OpaqueValueMapping
MapX(CGF
, XRValExpr
, XRValue
);
6285 RValue Res
= CGF
.EmitAnyExpr(UE
);
6286 NewVVal
= IsPostfixUpdate
? XRValue
: Res
;
6289 auto Res
= CGF
.EmitOMPAtomicSimpleUpdateExpr(
6290 XLValue
, ExprRValue
, BOUE
->getOpcode(), IsXLHSInRHSPart
, AO
, Loc
, Gen
);
6291 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6293 // 'atomicrmw' instruction was generated.
6294 if (IsPostfixUpdate
) {
6295 // Use old value from 'atomicrmw'.
6296 NewVVal
= Res
.second
;
6298 // 'atomicrmw' does not provide new value, so evaluate it using old
6300 CodeGenFunction::OpaqueValueMapping
MapExpr(CGF
, ERValExpr
, ExprRValue
);
6301 CodeGenFunction::OpaqueValueMapping
MapX(CGF
, XRValExpr
, Res
.second
);
6302 NewVVal
= CGF
.EmitAnyExpr(UE
);
6306 // 'x' is simply rewritten with some 'expr'.
6307 NewVValType
= X
->getType().getNonReferenceType();
6308 ExprRValue
= convertToType(CGF
, ExprRValue
, E
->getType(),
6309 X
->getType().getNonReferenceType(), Loc
);
6310 auto &&Gen
= [&NewVVal
, ExprRValue
](RValue XRValue
) {
6314 // Try to perform atomicrmw xchg, otherwise simple exchange.
6315 auto Res
= CGF
.EmitOMPAtomicSimpleUpdateExpr(
6316 XLValue
, ExprRValue
, /*BO=*/BO_Assign
, /*IsXLHSInRHSPart=*/false, AO
,
6318 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6320 // 'atomicrmw' instruction was generated.
6321 NewVVal
= IsPostfixUpdate
? Res
.second
: ExprRValue
;
6324 // Emit post-update store to 'v' of old/new 'x' value.
6325 CGF
.emitOMPSimpleStore(VLValue
, NewVVal
, NewVValType
, Loc
);
6326 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, V
);
6327 // OpenMP 5.1 removes the required flush for capture clause.
6328 if (CGF
.CGM
.getLangOpts().OpenMP
< 51) {
6329 // OpenMP, 2.17.7, atomic Construct
6330 // If the write, update, or capture clause is specified and the release,
6331 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6332 // the atomic operation is also a release flush.
6333 // If the read or capture clause is specified and the acquire, acq_rel, or
6334 // seq_cst clause is specified then the strong flush on exit from the atomic
6335 // operation is also an acquire flush.
6337 case llvm::AtomicOrdering::Release
:
6338 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6339 llvm::AtomicOrdering::Release
);
6341 case llvm::AtomicOrdering::Acquire
:
6342 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6343 llvm::AtomicOrdering::Acquire
);
6345 case llvm::AtomicOrdering::AcquireRelease
:
6346 case llvm::AtomicOrdering::SequentiallyConsistent
:
6347 CGF
.CGM
.getOpenMPRuntime().emitFlush(
6348 CGF
, std::nullopt
, Loc
, llvm::AtomicOrdering::AcquireRelease
);
6350 case llvm::AtomicOrdering::Monotonic
:
6352 case llvm::AtomicOrdering::NotAtomic
:
6353 case llvm::AtomicOrdering::Unordered
:
6354 llvm_unreachable("Unexpected ordering.");
6359 static void emitOMPAtomicCompareExpr(CodeGenFunction
&CGF
,
6360 llvm::AtomicOrdering AO
, const Expr
*X
,
6361 const Expr
*V
, const Expr
*R
,
6362 const Expr
*E
, const Expr
*D
,
6363 const Expr
*CE
, bool IsXBinopExpr
,
6364 bool IsPostfixUpdate
, bool IsFailOnly
,
6365 SourceLocation Loc
) {
6366 llvm::OpenMPIRBuilder
&OMPBuilder
=
6367 CGF
.CGM
.getOpenMPRuntime().getOMPBuilder();
6369 OMPAtomicCompareOp Op
;
6370 assert(isa
<BinaryOperator
>(CE
) && "CE is not a BinaryOperator");
6371 switch (cast
<BinaryOperator
>(CE
)->getOpcode()) {
6373 Op
= OMPAtomicCompareOp::EQ
;
6376 Op
= OMPAtomicCompareOp::MIN
;
6379 Op
= OMPAtomicCompareOp::MAX
;
6382 llvm_unreachable("unsupported atomic compare binary operator");
6385 LValue XLVal
= CGF
.EmitLValue(X
);
6386 Address XAddr
= XLVal
.getAddress(CGF
);
6388 auto EmitRValueWithCastIfNeeded
= [&CGF
, Loc
](const Expr
*X
, const Expr
*E
) {
6389 if (X
->getType() == E
->getType())
6390 return CGF
.EmitScalarExpr(E
);
6391 const Expr
*NewE
= E
->IgnoreImplicitAsWritten();
6392 llvm::Value
*V
= CGF
.EmitScalarExpr(NewE
);
6393 if (NewE
->getType() == X
->getType())
6395 return CGF
.EmitScalarConversion(V
, NewE
->getType(), X
->getType(), Loc
);
6398 llvm::Value
*EVal
= EmitRValueWithCastIfNeeded(X
, E
);
6399 llvm::Value
*DVal
= D
? EmitRValueWithCastIfNeeded(X
, D
) : nullptr;
6400 if (auto *CI
= dyn_cast
<llvm::ConstantInt
>(EVal
))
6401 EVal
= CGF
.Builder
.CreateIntCast(
6402 CI
, XLVal
.getAddress(CGF
).getElementType(),
6403 E
->getType()->hasSignedIntegerRepresentation());
6405 if (auto *CI
= dyn_cast
<llvm::ConstantInt
>(DVal
))
6406 DVal
= CGF
.Builder
.CreateIntCast(
6407 CI
, XLVal
.getAddress(CGF
).getElementType(),
6408 D
->getType()->hasSignedIntegerRepresentation());
6410 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal
{
6411 XAddr
.getPointer(), XAddr
.getElementType(),
6412 X
->getType()->hasSignedIntegerRepresentation(),
6413 X
->getType().isVolatileQualified()};
6414 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal
, ROpVal
;
6416 LValue LV
= CGF
.EmitLValue(V
);
6417 Address Addr
= LV
.getAddress(CGF
);
6418 VOpVal
= {Addr
.getPointer(), Addr
.getElementType(),
6419 V
->getType()->hasSignedIntegerRepresentation(),
6420 V
->getType().isVolatileQualified()};
6423 LValue LV
= CGF
.EmitLValue(R
);
6424 Address Addr
= LV
.getAddress(CGF
);
6425 ROpVal
= {Addr
.getPointer(), Addr
.getElementType(),
6426 R
->getType()->hasSignedIntegerRepresentation(),
6427 R
->getType().isVolatileQualified()};
6430 CGF
.Builder
.restoreIP(OMPBuilder
.createAtomicCompare(
6431 CGF
.Builder
, XOpVal
, VOpVal
, ROpVal
, EVal
, DVal
, AO
, Op
, IsXBinopExpr
,
6432 IsPostfixUpdate
, IsFailOnly
));
6435 static void emitOMPAtomicExpr(CodeGenFunction
&CGF
, OpenMPClauseKind Kind
,
6436 llvm::AtomicOrdering AO
, bool IsPostfixUpdate
,
6437 const Expr
*X
, const Expr
*V
, const Expr
*R
,
6438 const Expr
*E
, const Expr
*UE
, const Expr
*D
,
6439 const Expr
*CE
, bool IsXLHSInRHSPart
,
6440 bool IsFailOnly
, SourceLocation Loc
) {
6443 emitOMPAtomicReadExpr(CGF
, AO
, X
, V
, Loc
);
6446 emitOMPAtomicWriteExpr(CGF
, AO
, X
, E
, Loc
);
6450 emitOMPAtomicUpdateExpr(CGF
, AO
, X
, E
, UE
, IsXLHSInRHSPart
, Loc
);
6453 emitOMPAtomicCaptureExpr(CGF
, AO
, IsPostfixUpdate
, V
, X
, E
, UE
,
6454 IsXLHSInRHSPart
, Loc
);
6456 case OMPC_compare
: {
6457 emitOMPAtomicCompareExpr(CGF
, AO
, X
, V
, R
, E
, D
, CE
, IsXLHSInRHSPart
,
6458 IsPostfixUpdate
, IsFailOnly
, Loc
);
6462 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6466 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective
&S
) {
6467 llvm::AtomicOrdering AO
= llvm::AtomicOrdering::Monotonic
;
6468 bool MemOrderingSpecified
= false;
6469 if (S
.getSingleClause
<OMPSeqCstClause
>()) {
6470 AO
= llvm::AtomicOrdering::SequentiallyConsistent
;
6471 MemOrderingSpecified
= true;
6472 } else if (S
.getSingleClause
<OMPAcqRelClause
>()) {
6473 AO
= llvm::AtomicOrdering::AcquireRelease
;
6474 MemOrderingSpecified
= true;
6475 } else if (S
.getSingleClause
<OMPAcquireClause
>()) {
6476 AO
= llvm::AtomicOrdering::Acquire
;
6477 MemOrderingSpecified
= true;
6478 } else if (S
.getSingleClause
<OMPReleaseClause
>()) {
6479 AO
= llvm::AtomicOrdering::Release
;
6480 MemOrderingSpecified
= true;
6481 } else if (S
.getSingleClause
<OMPRelaxedClause
>()) {
6482 AO
= llvm::AtomicOrdering::Monotonic
;
6483 MemOrderingSpecified
= true;
6485 llvm::SmallSet
<OpenMPClauseKind
, 2> KindsEncountered
;
6486 OpenMPClauseKind Kind
= OMPC_unknown
;
6487 for (const OMPClause
*C
: S
.clauses()) {
6488 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6490 OpenMPClauseKind K
= C
->getClauseKind();
6491 if (K
== OMPC_seq_cst
|| K
== OMPC_acq_rel
|| K
== OMPC_acquire
||
6492 K
== OMPC_release
|| K
== OMPC_relaxed
|| K
== OMPC_hint
)
6495 KindsEncountered
.insert(K
);
6497 // We just need to correct Kind here. No need to set a bool saying it is
6498 // actually compare capture because we can tell from whether V and R are
6500 if (KindsEncountered
.contains(OMPC_compare
) &&
6501 KindsEncountered
.contains(OMPC_capture
))
6502 Kind
= OMPC_compare
;
6503 if (!MemOrderingSpecified
) {
6504 llvm::AtomicOrdering DefaultOrder
=
6505 CGM
.getOpenMPRuntime().getDefaultMemoryOrdering();
6506 if (DefaultOrder
== llvm::AtomicOrdering::Monotonic
||
6507 DefaultOrder
== llvm::AtomicOrdering::SequentiallyConsistent
||
6508 (DefaultOrder
== llvm::AtomicOrdering::AcquireRelease
&&
6509 Kind
== OMPC_capture
)) {
6511 } else if (DefaultOrder
== llvm::AtomicOrdering::AcquireRelease
) {
6512 if (Kind
== OMPC_unknown
|| Kind
== OMPC_update
|| Kind
== OMPC_write
) {
6513 AO
= llvm::AtomicOrdering::Release
;
6514 } else if (Kind
== OMPC_read
) {
6515 assert(Kind
== OMPC_read
&& "Unexpected atomic kind.");
6516 AO
= llvm::AtomicOrdering::Acquire
;
6521 LexicalScope
Scope(*this, S
.getSourceRange());
6522 EmitStopPoint(S
.getAssociatedStmt());
6523 emitOMPAtomicExpr(*this, Kind
, AO
, S
.isPostfixUpdate(), S
.getX(), S
.getV(),
6524 S
.getR(), S
.getExpr(), S
.getUpdateExpr(), S
.getD(),
6525 S
.getCondExpr(), S
.isXLHSInRHSPart(), S
.isFailOnly(),
6529 static void emitCommonOMPTargetDirective(CodeGenFunction
&CGF
,
6530 const OMPExecutableDirective
&S
,
6531 const RegionCodeGenTy
&CodeGen
) {
6532 assert(isOpenMPTargetExecutionDirective(S
.getDirectiveKind()));
6533 CodeGenModule
&CGM
= CGF
.CGM
;
6535 // On device emit this construct as inlined code.
6536 if (CGM
.getLangOpts().OpenMPIsDevice
) {
6537 OMPLexicalScope
Scope(CGF
, S
, OMPD_target
);
6538 CGM
.getOpenMPRuntime().emitInlinedDirective(
6539 CGF
, OMPD_target
, [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6540 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
6545 auto LPCRegion
= CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF
, S
);
6546 llvm::Function
*Fn
= nullptr;
6547 llvm::Constant
*FnID
= nullptr;
6549 const Expr
*IfCond
= nullptr;
6550 // Check for the at most one if clause associated with the target region.
6551 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
6552 if (C
->getNameModifier() == OMPD_unknown
||
6553 C
->getNameModifier() == OMPD_target
) {
6554 IfCond
= C
->getCondition();
6559 // Check if we have any device clause associated with the directive.
6560 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device(
6561 nullptr, OMPC_DEVICE_unknown
);
6562 if (auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
6563 Device
.setPointerAndInt(C
->getDevice(), C
->getModifier());
6565 // Check if we have an if clause whose conditional always evaluates to false
6566 // or if we do not have any targets specified. If so the target region is not
6567 // an offload entry point.
6568 bool IsOffloadEntry
= true;
6571 if (CGF
.ConstantFoldsToSimpleInteger(IfCond
, Val
) && !Val
)
6572 IsOffloadEntry
= false;
6574 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
6575 IsOffloadEntry
= false;
6577 if (CGM
.getLangOpts().OpenMPOffloadMandatory
&& !IsOffloadEntry
) {
6578 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
6579 DiagnosticsEngine::Error
,
6580 "No offloading entry generated while offloading is mandatory.");
6581 CGM
.getDiags().Report(DiagID
);
6584 assert(CGF
.CurFuncDecl
&& "No parent declaration for target region!");
6585 StringRef ParentName
;
6586 // In case we have Ctors/Dtors we use the complete type variant to produce
6587 // the mangling of the device outlined kernel.
6588 if (const auto *D
= dyn_cast
<CXXConstructorDecl
>(CGF
.CurFuncDecl
))
6589 ParentName
= CGM
.getMangledName(GlobalDecl(D
, Ctor_Complete
));
6590 else if (const auto *D
= dyn_cast
<CXXDestructorDecl
>(CGF
.CurFuncDecl
))
6591 ParentName
= CGM
.getMangledName(GlobalDecl(D
, Dtor_Complete
));
6594 CGM
.getMangledName(GlobalDecl(cast
<FunctionDecl
>(CGF
.CurFuncDecl
)));
6596 // Emit target region as a standalone region.
6597 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(S
, ParentName
, Fn
, FnID
,
6598 IsOffloadEntry
, CodeGen
);
6599 OMPLexicalScope
Scope(CGF
, S
, OMPD_task
);
6600 auto &&SizeEmitter
=
6601 [IsOffloadEntry
](CodeGenFunction
&CGF
,
6602 const OMPLoopDirective
&D
) -> llvm::Value
* {
6603 if (IsOffloadEntry
) {
6604 OMPLoopScope(CGF
, D
);
6605 // Emit calculation of the iterations count.
6606 llvm::Value
*NumIterations
= CGF
.EmitScalarExpr(D
.getNumIterations());
6607 NumIterations
= CGF
.Builder
.CreateIntCast(NumIterations
, CGF
.Int64Ty
,
6608 /*isSigned=*/false);
6609 return NumIterations
;
6613 CGM
.getOpenMPRuntime().emitTargetCall(CGF
, S
, Fn
, FnID
, IfCond
, Device
,
6617 static void emitTargetRegion(CodeGenFunction
&CGF
, const OMPTargetDirective
&S
,
6618 PrePostActionTy
&Action
) {
6620 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6621 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
6622 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
6623 (void)PrivateScope
.Privatize();
6624 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
6625 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
6627 CGF
.EmitStmt(S
.getCapturedStmt(OMPD_target
)->getCapturedStmt());
6628 CGF
.EnsureInsertPoint();
6631 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule
&CGM
,
6632 StringRef ParentName
,
6633 const OMPTargetDirective
&S
) {
6634 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6635 emitTargetRegion(CGF
, S
, Action
);
6638 llvm::Constant
*Addr
;
6639 // Emit target region as a standalone region.
6640 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6641 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6642 assert(Fn
&& Addr
&& "Target device function emission failed.");
6645 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective
&S
) {
6646 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6647 emitTargetRegion(CGF
, S
, Action
);
6649 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6652 static void emitCommonOMPTeamsDirective(CodeGenFunction
&CGF
,
6653 const OMPExecutableDirective
&S
,
6654 OpenMPDirectiveKind InnermostKind
,
6655 const RegionCodeGenTy
&CodeGen
) {
6656 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_teams
);
6657 llvm::Function
*OutlinedFn
=
6658 CGF
.CGM
.getOpenMPRuntime().emitTeamsOutlinedFunction(
6659 S
, *CS
->getCapturedDecl()->param_begin(), InnermostKind
, CodeGen
);
6661 const auto *NT
= S
.getSingleClause
<OMPNumTeamsClause
>();
6662 const auto *TL
= S
.getSingleClause
<OMPThreadLimitClause
>();
6664 const Expr
*NumTeams
= NT
? NT
->getNumTeams() : nullptr;
6665 const Expr
*ThreadLimit
= TL
? TL
->getThreadLimit() : nullptr;
6667 CGF
.CGM
.getOpenMPRuntime().emitNumTeamsClause(CGF
, NumTeams
, ThreadLimit
,
6671 OMPTeamsScope
Scope(CGF
, S
);
6672 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
6673 CGF
.GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
6674 CGF
.CGM
.getOpenMPRuntime().emitTeamsCall(CGF
, S
, S
.getBeginLoc(), OutlinedFn
,
6678 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective
&S
) {
6679 // Emit teams region as a standalone region.
6680 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6682 OMPPrivateScope
PrivateScope(CGF
);
6683 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
6684 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
6685 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6686 (void)PrivateScope
.Privatize();
6687 CGF
.EmitStmt(S
.getCapturedStmt(OMPD_teams
)->getCapturedStmt());
6688 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6690 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute
, CodeGen
);
6691 emitPostUpdateForReductionClause(*this, S
,
6692 [](CodeGenFunction
&) { return nullptr; });
6695 static void emitTargetTeamsRegion(CodeGenFunction
&CGF
, PrePostActionTy
&Action
,
6696 const OMPTargetTeamsDirective
&S
) {
6697 auto *CS
= S
.getCapturedStmt(OMPD_teams
);
6699 // Emit teams region as a standalone region.
6700 auto &&CodeGen
= [&S
, CS
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6702 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6703 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
6704 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
6705 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6706 (void)PrivateScope
.Privatize();
6707 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
6708 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
6709 CGF
.EmitStmt(CS
->getCapturedStmt());
6710 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6712 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_teams
, CodeGen
);
6713 emitPostUpdateForReductionClause(CGF
, S
,
6714 [](CodeGenFunction
&) { return nullptr; });
6717 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6718 CodeGenModule
&CGM
, StringRef ParentName
,
6719 const OMPTargetTeamsDirective
&S
) {
6720 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6721 emitTargetTeamsRegion(CGF
, Action
, S
);
6724 llvm::Constant
*Addr
;
6725 // Emit target region as a standalone region.
6726 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6727 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6728 assert(Fn
&& Addr
&& "Target device function emission failed.");
6731 void CodeGenFunction::EmitOMPTargetTeamsDirective(
6732 const OMPTargetTeamsDirective
&S
) {
6733 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6734 emitTargetTeamsRegion(CGF
, Action
, S
);
6736 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6740 emitTargetTeamsDistributeRegion(CodeGenFunction
&CGF
, PrePostActionTy
&Action
,
6741 const OMPTargetTeamsDistributeDirective
&S
) {
6743 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6744 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6747 // Emit teams region as a standalone region.
6748 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6749 PrePostActionTy
&Action
) {
6751 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6752 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6753 (void)PrivateScope
.Privatize();
6754 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6756 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6758 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute
, CodeGen
);
6759 emitPostUpdateForReductionClause(CGF
, S
,
6760 [](CodeGenFunction
&) { return nullptr; });
6763 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6764 CodeGenModule
&CGM
, StringRef ParentName
,
6765 const OMPTargetTeamsDistributeDirective
&S
) {
6766 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6767 emitTargetTeamsDistributeRegion(CGF
, Action
, S
);
6770 llvm::Constant
*Addr
;
6771 // Emit target region as a standalone region.
6772 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6773 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6774 assert(Fn
&& Addr
&& "Target device function emission failed.");
6777 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6778 const OMPTargetTeamsDistributeDirective
&S
) {
6779 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6780 emitTargetTeamsDistributeRegion(CGF
, Action
, S
);
6782 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6785 static void emitTargetTeamsDistributeSimdRegion(
6786 CodeGenFunction
&CGF
, PrePostActionTy
&Action
,
6787 const OMPTargetTeamsDistributeSimdDirective
&S
) {
6789 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6790 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6793 // Emit teams region as a standalone region.
6794 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6795 PrePostActionTy
&Action
) {
6797 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6798 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6799 (void)PrivateScope
.Privatize();
6800 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6802 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6804 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute_simd
, CodeGen
);
6805 emitPostUpdateForReductionClause(CGF
, S
,
6806 [](CodeGenFunction
&) { return nullptr; });
6809 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6810 CodeGenModule
&CGM
, StringRef ParentName
,
6811 const OMPTargetTeamsDistributeSimdDirective
&S
) {
6812 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6813 emitTargetTeamsDistributeSimdRegion(CGF
, Action
, S
);
6816 llvm::Constant
*Addr
;
6817 // Emit target region as a standalone region.
6818 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6819 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6820 assert(Fn
&& Addr
&& "Target device function emission failed.");
6823 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6824 const OMPTargetTeamsDistributeSimdDirective
&S
) {
6825 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6826 emitTargetTeamsDistributeSimdRegion(CGF
, Action
, S
);
6828 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6831 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6832 const OMPTeamsDistributeDirective
&S
) {
6834 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6835 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6838 // Emit teams region as a standalone region.
6839 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6840 PrePostActionTy
&Action
) {
6842 OMPPrivateScope
PrivateScope(CGF
);
6843 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6844 (void)PrivateScope
.Privatize();
6845 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6847 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6849 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute
, CodeGen
);
6850 emitPostUpdateForReductionClause(*this, S
,
6851 [](CodeGenFunction
&) { return nullptr; });
6854 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6855 const OMPTeamsDistributeSimdDirective
&S
) {
6856 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6857 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6860 // Emit teams region as a standalone region.
6861 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6862 PrePostActionTy
&Action
) {
6864 OMPPrivateScope
PrivateScope(CGF
);
6865 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6866 (void)PrivateScope
.Privatize();
6867 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_simd
,
6869 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6871 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute_simd
, CodeGen
);
6872 emitPostUpdateForReductionClause(*this, S
,
6873 [](CodeGenFunction
&) { return nullptr; });
6876 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
6877 const OMPTeamsDistributeParallelForDirective
&S
) {
6878 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6879 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
6883 // Emit teams region as a standalone region.
6884 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6885 PrePostActionTy
&Action
) {
6887 OMPPrivateScope
PrivateScope(CGF
);
6888 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6889 (void)PrivateScope
.Privatize();
6890 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6892 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6894 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute_parallel_for
, CodeGen
);
6895 emitPostUpdateForReductionClause(*this, S
,
6896 [](CodeGenFunction
&) { return nullptr; });
6899 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
6900 const OMPTeamsDistributeParallelForSimdDirective
&S
) {
6901 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6902 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
6906 // Emit teams region as a standalone region.
6907 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6908 PrePostActionTy
&Action
) {
6910 OMPPrivateScope
PrivateScope(CGF
);
6911 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6912 (void)PrivateScope
.Privatize();
6913 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
6914 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
6915 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6917 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute_parallel_for_simd
,
6919 emitPostUpdateForReductionClause(*this, S
,
6920 [](CodeGenFunction
&) { return nullptr; });
6923 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective
&S
) {
6924 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
6925 llvm::Value
*Device
= nullptr;
6926 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
6927 Device
= EmitScalarExpr(C
->getDevice());
6929 llvm::Value
*NumDependences
= nullptr;
6930 llvm::Value
*DependenceAddress
= nullptr;
6931 if (const auto *DC
= S
.getSingleClause
<OMPDependClause
>()) {
6932 OMPTaskDataTy::DependData
Dependencies(DC
->getDependencyKind(),
6934 Dependencies
.DepExprs
.append(DC
->varlist_begin(), DC
->varlist_end());
6935 std::pair
<llvm::Value
*, Address
> DependencePair
=
6936 CGM
.getOpenMPRuntime().emitDependClause(*this, Dependencies
,
6938 NumDependences
= DependencePair
.first
;
6939 DependenceAddress
= Builder
.CreatePointerCast(
6940 DependencePair
.second
.getPointer(), CGM
.Int8PtrTy
);
6943 assert(!(S
.hasClausesOfKind
<OMPNowaitClause
>() &&
6944 !(S
.getSingleClause
<OMPInitClause
>() ||
6945 S
.getSingleClause
<OMPDestroyClause
>() ||
6946 S
.getSingleClause
<OMPUseClause
>())) &&
6947 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
6949 if (const auto *C
= S
.getSingleClause
<OMPInitClause
>()) {
6950 llvm::Value
*InteropvarPtr
=
6951 EmitLValue(C
->getInteropVar()).getPointer(*this);
6952 llvm::omp::OMPInteropType InteropType
= llvm::omp::OMPInteropType::Unknown
;
6953 if (C
->getIsTarget()) {
6954 InteropType
= llvm::omp::OMPInteropType::Target
;
6956 assert(C
->getIsTargetSync() && "Expected interop-type target/targetsync");
6957 InteropType
= llvm::omp::OMPInteropType::TargetSync
;
6959 OMPBuilder
.createOMPInteropInit(Builder
, InteropvarPtr
, InteropType
, Device
,
6960 NumDependences
, DependenceAddress
,
6961 S
.hasClausesOfKind
<OMPNowaitClause
>());
6962 } else if (const auto *C
= S
.getSingleClause
<OMPDestroyClause
>()) {
6963 llvm::Value
*InteropvarPtr
=
6964 EmitLValue(C
->getInteropVar()).getPointer(*this);
6965 OMPBuilder
.createOMPInteropDestroy(Builder
, InteropvarPtr
, Device
,
6966 NumDependences
, DependenceAddress
,
6967 S
.hasClausesOfKind
<OMPNowaitClause
>());
6968 } else if (const auto *C
= S
.getSingleClause
<OMPUseClause
>()) {
6969 llvm::Value
*InteropvarPtr
=
6970 EmitLValue(C
->getInteropVar()).getPointer(*this);
6971 OMPBuilder
.createOMPInteropUse(Builder
, InteropvarPtr
, Device
,
6972 NumDependences
, DependenceAddress
,
6973 S
.hasClausesOfKind
<OMPNowaitClause
>());
6977 static void emitTargetTeamsDistributeParallelForRegion(
6978 CodeGenFunction
&CGF
, const OMPTargetTeamsDistributeParallelForDirective
&S
,
6979 PrePostActionTy
&Action
) {
6981 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6982 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
6986 // Emit teams region as a standalone region.
6987 auto &&CodeGenTeams
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6988 PrePostActionTy
&Action
) {
6990 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6991 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6992 (void)PrivateScope
.Privatize();
6993 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
6994 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
6995 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6998 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute_parallel_for
,
7000 emitPostUpdateForReductionClause(CGF
, S
,
7001 [](CodeGenFunction
&) { return nullptr; });
7004 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7005 CodeGenModule
&CGM
, StringRef ParentName
,
7006 const OMPTargetTeamsDistributeParallelForDirective
&S
) {
7007 // Emit SPMD target teams distribute parallel for region as a standalone
7009 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7010 emitTargetTeamsDistributeParallelForRegion(CGF
, S
, Action
);
7013 llvm::Constant
*Addr
;
7014 // Emit target region as a standalone region.
7015 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7016 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7017 assert(Fn
&& Addr
&& "Target device function emission failed.");
7020 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7021 const OMPTargetTeamsDistributeParallelForDirective
&S
) {
7022 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7023 emitTargetTeamsDistributeParallelForRegion(CGF
, S
, Action
);
7025 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7028 static void emitTargetTeamsDistributeParallelForSimdRegion(
7029 CodeGenFunction
&CGF
,
7030 const OMPTargetTeamsDistributeParallelForSimdDirective
&S
,
7031 PrePostActionTy
&Action
) {
7033 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7034 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
7038 // Emit teams region as a standalone region.
7039 auto &&CodeGenTeams
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
7040 PrePostActionTy
&Action
) {
7042 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
7043 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7044 (void)PrivateScope
.Privatize();
7045 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
7046 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
7047 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
7050 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute_parallel_for_simd
,
7052 emitPostUpdateForReductionClause(CGF
, S
,
7053 [](CodeGenFunction
&) { return nullptr; });
7056 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7057 CodeGenModule
&CGM
, StringRef ParentName
,
7058 const OMPTargetTeamsDistributeParallelForSimdDirective
&S
) {
7059 // Emit SPMD target teams distribute parallel for simd region as a standalone
7061 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7062 emitTargetTeamsDistributeParallelForSimdRegion(CGF
, S
, Action
);
7065 llvm::Constant
*Addr
;
7066 // Emit target region as a standalone region.
7067 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7068 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7069 assert(Fn
&& Addr
&& "Target device function emission failed.");
7072 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7073 const OMPTargetTeamsDistributeParallelForSimdDirective
&S
) {
7074 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7075 emitTargetTeamsDistributeParallelForSimdRegion(CGF
, S
, Action
);
7077 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7080 void CodeGenFunction::EmitOMPCancellationPointDirective(
7081 const OMPCancellationPointDirective
&S
) {
7082 CGM
.getOpenMPRuntime().emitCancellationPointCall(*this, S
.getBeginLoc(),
7083 S
.getCancelRegion());
7086 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective
&S
) {
7087 const Expr
*IfCond
= nullptr;
7088 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
7089 if (C
->getNameModifier() == OMPD_unknown
||
7090 C
->getNameModifier() == OMPD_cancel
) {
7091 IfCond
= C
->getCondition();
7095 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
7096 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
7097 // TODO: This check is necessary as we only generate `omp parallel` through
7098 // the OpenMPIRBuilder for now.
7099 if (S
.getCancelRegion() == OMPD_parallel
||
7100 S
.getCancelRegion() == OMPD_sections
||
7101 S
.getCancelRegion() == OMPD_section
) {
7102 llvm::Value
*IfCondition
= nullptr;
7104 IfCondition
= EmitScalarExpr(IfCond
,
7105 /*IgnoreResultAssign=*/true);
7106 return Builder
.restoreIP(
7107 OMPBuilder
.createCancel(Builder
, IfCondition
, S
.getCancelRegion()));
7111 CGM
.getOpenMPRuntime().emitCancelCall(*this, S
.getBeginLoc(), IfCond
,
7112 S
.getCancelRegion());
7115 CodeGenFunction::JumpDest
7116 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind
) {
7117 if (Kind
== OMPD_parallel
|| Kind
== OMPD_task
||
7118 Kind
== OMPD_target_parallel
|| Kind
== OMPD_taskloop
||
7119 Kind
== OMPD_master_taskloop
|| Kind
== OMPD_parallel_master_taskloop
)
7121 assert(Kind
== OMPD_for
|| Kind
== OMPD_section
|| Kind
== OMPD_sections
||
7122 Kind
== OMPD_parallel_sections
|| Kind
== OMPD_parallel_for
||
7123 Kind
== OMPD_distribute_parallel_for
||
7124 Kind
== OMPD_target_parallel_for
||
7125 Kind
== OMPD_teams_distribute_parallel_for
||
7126 Kind
== OMPD_target_teams_distribute_parallel_for
);
7127 return OMPCancelStack
.getExitBlock();
7130 void CodeGenFunction::EmitOMPUseDevicePtrClause(
7131 const OMPUseDevicePtrClause
&C
, OMPPrivateScope
&PrivateScope
,
7132 const llvm::DenseMap
<const ValueDecl
*, Address
> &CaptureDeviceAddrMap
) {
7133 auto OrigVarIt
= C
.varlist_begin();
7134 auto InitIt
= C
.inits().begin();
7135 for (const Expr
*PvtVarIt
: C
.private_copies()) {
7136 const auto *OrigVD
=
7137 cast
<VarDecl
>(cast
<DeclRefExpr
>(*OrigVarIt
)->getDecl());
7138 const auto *InitVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*InitIt
)->getDecl());
7139 const auto *PvtVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(PvtVarIt
)->getDecl());
7141 // In order to identify the right initializer we need to match the
7142 // declaration used by the mapping logic. In some cases we may get
7143 // OMPCapturedExprDecl that refers to the original declaration.
7144 const ValueDecl
*MatchingVD
= OrigVD
;
7145 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(MatchingVD
)) {
7146 // OMPCapturedExprDecl are used to privative fields of the current
7148 const auto *ME
= cast
<MemberExpr
>(OED
->getInit());
7149 assert(isa
<CXXThisExpr
>(ME
->getBase()) &&
7150 "Base should be the current struct!");
7151 MatchingVD
= ME
->getMemberDecl();
7154 // If we don't have information about the current list item, move on to
7156 auto InitAddrIt
= CaptureDeviceAddrMap
.find(MatchingVD
);
7157 if (InitAddrIt
== CaptureDeviceAddrMap
.end())
7160 // Initialize the temporary initialization variable with the address
7161 // we get from the runtime library. We have to cast the source address
7162 // because it is always a void *. References are materialized in the
7163 // privatization scope, so the initialization here disregards the fact
7164 // the original variable is a reference.
7165 llvm::Type
*Ty
= ConvertTypeForMem(OrigVD
->getType().getNonReferenceType());
7166 Address InitAddr
= Builder
.CreateElementBitCast(InitAddrIt
->second
, Ty
);
7167 setAddrOfLocalVar(InitVD
, InitAddr
);
7169 // Emit private declaration, it will be initialized by the value we
7170 // declaration we just added to the local declarations map.
7173 // The initialization variables reached its purpose in the emission
7174 // of the previous declaration, so we don't need it anymore.
7175 LocalDeclMap
.erase(InitVD
);
7177 // Return the address of the private variable.
7179 PrivateScope
.addPrivate(OrigVD
, GetAddrOfLocalVar(PvtVD
));
7180 assert(IsRegistered
&& "firstprivate var already registered as private");
7181 // Silence the warning about unused variable.
7189 static const VarDecl
*getBaseDecl(const Expr
*Ref
) {
7190 const Expr
*Base
= Ref
->IgnoreParenImpCasts();
7191 while (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(Base
))
7192 Base
= OASE
->getBase()->IgnoreParenImpCasts();
7193 while (const auto *ASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
7194 Base
= ASE
->getBase()->IgnoreParenImpCasts();
7195 return cast
<VarDecl
>(cast
<DeclRefExpr
>(Base
)->getDecl());
7198 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7199 const OMPUseDeviceAddrClause
&C
, OMPPrivateScope
&PrivateScope
,
7200 const llvm::DenseMap
<const ValueDecl
*, Address
> &CaptureDeviceAddrMap
) {
7201 llvm::SmallDenseSet
<CanonicalDeclPtr
<const Decl
>, 4> Processed
;
7202 for (const Expr
*Ref
: C
.varlists()) {
7203 const VarDecl
*OrigVD
= getBaseDecl(Ref
);
7204 if (!Processed
.insert(OrigVD
).second
)
7206 // In order to identify the right initializer we need to match the
7207 // declaration used by the mapping logic. In some cases we may get
7208 // OMPCapturedExprDecl that refers to the original declaration.
7209 const ValueDecl
*MatchingVD
= OrigVD
;
7210 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(MatchingVD
)) {
7211 // OMPCapturedExprDecl are used to privative fields of the current
7213 const auto *ME
= cast
<MemberExpr
>(OED
->getInit());
7214 assert(isa
<CXXThisExpr
>(ME
->getBase()) &&
7215 "Base should be the current struct!");
7216 MatchingVD
= ME
->getMemberDecl();
7219 // If we don't have information about the current list item, move on to
7221 auto InitAddrIt
= CaptureDeviceAddrMap
.find(MatchingVD
);
7222 if (InitAddrIt
== CaptureDeviceAddrMap
.end())
7225 Address PrivAddr
= InitAddrIt
->getSecond();
7226 // For declrefs and variable length array need to load the pointer for
7227 // correct mapping, since the pointer to the data was passed to the runtime.
7228 if (isa
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts()) ||
7229 MatchingVD
->getType()->isArrayType()) {
7230 QualType PtrTy
= getContext().getPointerType(
7231 OrigVD
->getType().getNonReferenceType());
7232 PrivAddr
= EmitLoadOfPointer(
7233 Builder
.CreateElementBitCast(PrivAddr
, ConvertTypeForMem(PtrTy
)),
7234 PtrTy
->castAs
<PointerType
>());
7237 (void)PrivateScope
.addPrivate(OrigVD
, PrivAddr
);
7241 // Generate the instructions for '#pragma omp target data' directive.
7242 void CodeGenFunction::EmitOMPTargetDataDirective(
7243 const OMPTargetDataDirective
&S
) {
7244 CGOpenMPRuntime::TargetDataInfo
Info(/*RequiresDevicePointerInfo=*/true,
7245 /*SeparateBeginEndCalls=*/true);
7247 // Create a pre/post action to signal the privatization of the device pointer.
7248 // This action can be replaced by the OpenMP runtime code generation to
7249 // deactivate privatization.
7250 bool PrivatizeDevicePointers
= false;
7251 class DevicePointerPrivActionTy
: public PrePostActionTy
{
7252 bool &PrivatizeDevicePointers
;
7255 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers
)
7256 : PrivatizeDevicePointers(PrivatizeDevicePointers
) {}
7257 void Enter(CodeGenFunction
&CGF
) override
{
7258 PrivatizeDevicePointers
= true;
7261 DevicePointerPrivActionTy
PrivAction(PrivatizeDevicePointers
);
7263 auto &&CodeGen
= [&S
, &Info
, &PrivatizeDevicePointers
](
7264 CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7265 auto &&InnermostCodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7266 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
7269 // Codegen that selects whether to generate the privatization code or not.
7270 auto &&PrivCodeGen
= [&S
, &Info
, &PrivatizeDevicePointers
,
7271 &InnermostCodeGen
](CodeGenFunction
&CGF
,
7272 PrePostActionTy
&Action
) {
7273 RegionCodeGenTy
RCG(InnermostCodeGen
);
7274 PrivatizeDevicePointers
= false;
7276 // Call the pre-action to change the status of PrivatizeDevicePointers if
7280 if (PrivatizeDevicePointers
) {
7281 OMPPrivateScope
PrivateScope(CGF
);
7282 // Emit all instances of the use_device_ptr clause.
7283 for (const auto *C
: S
.getClausesOfKind
<OMPUseDevicePtrClause
>())
7284 CGF
.EmitOMPUseDevicePtrClause(*C
, PrivateScope
,
7285 Info
.CaptureDeviceAddrMap
);
7286 for (const auto *C
: S
.getClausesOfKind
<OMPUseDeviceAddrClause
>())
7287 CGF
.EmitOMPUseDeviceAddrClause(*C
, PrivateScope
,
7288 Info
.CaptureDeviceAddrMap
);
7289 (void)PrivateScope
.Privatize();
7292 OMPLexicalScope
Scope(CGF
, S
, OMPD_unknown
);
7297 // Forward the provided action to the privatization codegen.
7298 RegionCodeGenTy
PrivRCG(PrivCodeGen
);
7299 PrivRCG
.setAction(Action
);
7301 // Notwithstanding the body of the region is emitted as inlined directive,
7302 // we don't use an inline scope as changes in the references inside the
7303 // region are expected to be visible outside, so we do not privative them.
7304 OMPLexicalScope
Scope(CGF
, S
);
7305 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_target_data
,
7309 RegionCodeGenTy
RCG(CodeGen
);
7311 // If we don't have target devices, don't bother emitting the data mapping
7313 if (CGM
.getLangOpts().OMPTargetTriples
.empty()) {
7318 // Check if we have any if clause associated with the directive.
7319 const Expr
*IfCond
= nullptr;
7320 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7321 IfCond
= C
->getCondition();
7323 // Check if we have any device clause associated with the directive.
7324 const Expr
*Device
= nullptr;
7325 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7326 Device
= C
->getDevice();
7328 // Set the action to signal privatization of device pointers.
7329 RCG
.setAction(PrivAction
);
7331 // Emit region code.
7332 CGM
.getOpenMPRuntime().emitTargetDataCalls(*this, S
, IfCond
, Device
, RCG
,
7336 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7337 const OMPTargetEnterDataDirective
&S
) {
7338 // If we don't have target devices, don't bother emitting the data mapping
7340 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
7343 // Check if we have any if clause associated with the directive.
7344 const Expr
*IfCond
= nullptr;
7345 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7346 IfCond
= C
->getCondition();
7348 // Check if we have any device clause associated with the directive.
7349 const Expr
*Device
= nullptr;
7350 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7351 Device
= C
->getDevice();
7353 OMPLexicalScope
Scope(*this, S
, OMPD_task
);
7354 CGM
.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S
, IfCond
, Device
);
7357 void CodeGenFunction::EmitOMPTargetExitDataDirective(
7358 const OMPTargetExitDataDirective
&S
) {
7359 // If we don't have target devices, don't bother emitting the data mapping
7361 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
7364 // Check if we have any if clause associated with the directive.
7365 const Expr
*IfCond
= nullptr;
7366 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7367 IfCond
= C
->getCondition();
7369 // Check if we have any device clause associated with the directive.
7370 const Expr
*Device
= nullptr;
7371 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7372 Device
= C
->getDevice();
7374 OMPLexicalScope
Scope(*this, S
, OMPD_task
);
7375 CGM
.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S
, IfCond
, Device
);
7378 static void emitTargetParallelRegion(CodeGenFunction
&CGF
,
7379 const OMPTargetParallelDirective
&S
,
7380 PrePostActionTy
&Action
) {
7381 // Get the captured statement associated with the 'parallel' region.
7382 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_parallel
);
7384 auto &&CodeGen
= [&S
, CS
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7386 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
7387 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
7388 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
7389 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7390 (void)PrivateScope
.Privatize();
7391 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
7392 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
7393 // TODO: Add support for clauses.
7394 CGF
.EmitStmt(CS
->getCapturedStmt());
7395 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
7397 emitCommonOMPParallelDirective(CGF
, S
, OMPD_parallel
, CodeGen
,
7398 emitEmptyBoundParameters
);
7399 emitPostUpdateForReductionClause(CGF
, S
,
7400 [](CodeGenFunction
&) { return nullptr; });
7403 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7404 CodeGenModule
&CGM
, StringRef ParentName
,
7405 const OMPTargetParallelDirective
&S
) {
7406 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7407 emitTargetParallelRegion(CGF
, S
, Action
);
7410 llvm::Constant
*Addr
;
7411 // Emit target region as a standalone region.
7412 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7413 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7414 assert(Fn
&& Addr
&& "Target device function emission failed.");
7417 void CodeGenFunction::EmitOMPTargetParallelDirective(
7418 const OMPTargetParallelDirective
&S
) {
7419 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7420 emitTargetParallelRegion(CGF
, S
, Action
);
7422 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7425 static void emitTargetParallelForRegion(CodeGenFunction
&CGF
,
7426 const OMPTargetParallelForDirective
&S
,
7427 PrePostActionTy
&Action
) {
7429 // Emit directive as a combined directive that consists of two implicit
7430 // directives: 'parallel' with 'for' directive.
7431 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7433 CodeGenFunction::OMPCancelStackRAII
CancelRegion(
7434 CGF
, OMPD_target_parallel_for
, S
.hasCancel());
7435 CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(), emitForLoopBounds
,
7436 emitDispatchForLoopBounds
);
7438 emitCommonOMPParallelDirective(CGF
, S
, OMPD_for
, CodeGen
,
7439 emitEmptyBoundParameters
);
7442 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7443 CodeGenModule
&CGM
, StringRef ParentName
,
7444 const OMPTargetParallelForDirective
&S
) {
7445 // Emit SPMD target parallel for region as a standalone region.
7446 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7447 emitTargetParallelForRegion(CGF
, S
, Action
);
7450 llvm::Constant
*Addr
;
7451 // Emit target region as a standalone region.
7452 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7453 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7454 assert(Fn
&& Addr
&& "Target device function emission failed.");
7457 void CodeGenFunction::EmitOMPTargetParallelForDirective(
7458 const OMPTargetParallelForDirective
&S
) {
7459 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7460 emitTargetParallelForRegion(CGF
, S
, Action
);
7462 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7466 emitTargetParallelForSimdRegion(CodeGenFunction
&CGF
,
7467 const OMPTargetParallelForSimdDirective
&S
,
7468 PrePostActionTy
&Action
) {
7470 // Emit directive as a combined directive that consists of two implicit
7471 // directives: 'parallel' with 'for' directive.
7472 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7474 CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(), emitForLoopBounds
,
7475 emitDispatchForLoopBounds
);
7477 emitCommonOMPParallelDirective(CGF
, S
, OMPD_simd
, CodeGen
,
7478 emitEmptyBoundParameters
);
7481 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7482 CodeGenModule
&CGM
, StringRef ParentName
,
7483 const OMPTargetParallelForSimdDirective
&S
) {
7484 // Emit SPMD target parallel for region as a standalone region.
7485 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7486 emitTargetParallelForSimdRegion(CGF
, S
, Action
);
7489 llvm::Constant
*Addr
;
7490 // Emit target region as a standalone region.
7491 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7492 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7493 assert(Fn
&& Addr
&& "Target device function emission failed.");
7496 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7497 const OMPTargetParallelForSimdDirective
&S
) {
7498 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7499 emitTargetParallelForSimdRegion(CGF
, S
, Action
);
7501 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7504 /// Emit a helper variable and return corresponding lvalue.
7505 static void mapParam(CodeGenFunction
&CGF
, const DeclRefExpr
*Helper
,
7506 const ImplicitParamDecl
*PVD
,
7507 CodeGenFunction::OMPPrivateScope
&Privates
) {
7508 const auto *VDecl
= cast
<VarDecl
>(Helper
->getDecl());
7509 Privates
.addPrivate(VDecl
, CGF
.GetAddrOfLocalVar(PVD
));
7512 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective
&S
) {
7513 assert(isOpenMPTaskLoopDirective(S
.getDirectiveKind()));
7514 // Emit outlined function for task construct.
7515 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_taskloop
);
7516 Address CapturedStruct
= Address::invalid();
7518 OMPLexicalScope
Scope(*this, S
, OMPD_taskloop
, /*EmitPreInitStmt=*/false);
7519 CapturedStruct
= GenerateCapturedStmtArgument(*CS
);
7521 QualType SharedsTy
= getContext().getRecordType(CS
->getCapturedRecordDecl());
7522 const Expr
*IfCond
= nullptr;
7523 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
7524 if (C
->getNameModifier() == OMPD_unknown
||
7525 C
->getNameModifier() == OMPD_taskloop
) {
7526 IfCond
= C
->getCondition();
7532 // Check if taskloop must be emitted without taskgroup.
7533 Data
.Nogroup
= S
.getSingleClause
<OMPNogroupClause
>();
7534 // TODO: Check if we should emit tied or untied task.
7536 // Set scheduling for taskloop
7537 if (const auto *Clause
= S
.getSingleClause
<OMPGrainsizeClause
>()) {
7539 Data
.Schedule
.setInt(/*IntVal=*/false);
7540 Data
.Schedule
.setPointer(EmitScalarExpr(Clause
->getGrainsize()));
7541 } else if (const auto *Clause
= S
.getSingleClause
<OMPNumTasksClause
>()) {
7543 Data
.Schedule
.setInt(/*IntVal=*/true);
7544 Data
.Schedule
.setPointer(EmitScalarExpr(Clause
->getNumTasks()));
7547 auto &&BodyGen
= [CS
, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7549 // for (IV in 0..LastIteration) BODY;
7550 // <Final counter/linear vars updates>;
7554 // Emit: if (PreCond) - begin.
7555 // If the condition constant folds and can be elided, avoid emitting the
7558 llvm::BasicBlock
*ContBlock
= nullptr;
7559 OMPLoopScope
PreInitScope(CGF
, S
);
7560 if (CGF
.ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
7564 llvm::BasicBlock
*ThenBlock
= CGF
.createBasicBlock("taskloop.if.then");
7565 ContBlock
= CGF
.createBasicBlock("taskloop.if.end");
7566 emitPreCond(CGF
, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
7567 CGF
.getProfileCount(&S
));
7568 CGF
.EmitBlock(ThenBlock
);
7569 CGF
.incrementProfileCounter(&S
);
7572 (void)CGF
.EmitOMPLinearClauseInit(S
);
7574 OMPPrivateScope
LoopScope(CGF
);
7575 // Emit helper vars inits.
7576 enum { LowerBound
= 5, UpperBound
, Stride
, LastIter
};
7577 auto *I
= CS
->getCapturedDecl()->param_begin();
7578 auto *LBP
= std::next(I
, LowerBound
);
7579 auto *UBP
= std::next(I
, UpperBound
);
7580 auto *STP
= std::next(I
, Stride
);
7581 auto *LIP
= std::next(I
, LastIter
);
7582 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getLowerBoundVariable()), *LBP
,
7584 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getUpperBoundVariable()), *UBP
,
7586 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getStrideVariable()), *STP
, LoopScope
);
7587 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getIsLastIterVariable()), *LIP
,
7589 CGF
.EmitOMPPrivateLoopCounters(S
, LoopScope
);
7590 CGF
.EmitOMPLinearClause(S
, LoopScope
);
7591 bool HasLastprivateClause
= CGF
.EmitOMPLastprivateClauseInit(S
, LoopScope
);
7592 (void)LoopScope
.Privatize();
7593 // Emit the loop iteration variable.
7594 const Expr
*IVExpr
= S
.getIterationVariable();
7595 const auto *IVDecl
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IVExpr
)->getDecl());
7596 CGF
.EmitVarDecl(*IVDecl
);
7597 CGF
.EmitIgnoredExpr(S
.getInit());
7599 // Emit the iterations count variable.
7600 // If it is not a variable, Sema decided to calculate iterations count on
7601 // each iteration (e.g., it is foldable into a constant).
7602 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
7603 CGF
.EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
7604 // Emit calculation of the iterations count.
7605 CGF
.EmitIgnoredExpr(S
.getCalcLastIteration());
7609 OMPLexicalScope
Scope(CGF
, S
, OMPD_taskloop
, /*EmitPreInitStmt=*/false);
7612 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7613 if (isOpenMPSimdDirective(S
.getDirectiveKind()))
7614 CGF
.EmitOMPSimdInit(S
);
7616 [&S
, &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7617 CGF
.EmitOMPInnerLoop(
7618 S
, LoopScope
.requiresCleanups(), S
.getCond(), S
.getInc(),
7619 [&S
](CodeGenFunction
&CGF
) {
7620 emitOMPLoopBodyWithStopPoint(CGF
, S
,
7621 CodeGenFunction::JumpDest());
7623 [](CodeGenFunction
&) {});
7626 // Emit: if (PreCond) - end.
7628 CGF
.EmitBranch(ContBlock
);
7629 CGF
.EmitBlock(ContBlock
, true);
7631 // Emit final copy of the lastprivate variables if IsLastIter != 0.
7632 if (HasLastprivateClause
) {
7633 CGF
.EmitOMPLastprivateClauseFinal(
7634 S
, isOpenMPSimdDirective(S
.getDirectiveKind()),
7635 CGF
.Builder
.CreateIsNotNull(CGF
.EmitLoadOfScalar(
7636 CGF
.GetAddrOfLocalVar(*LIP
), /*Volatile=*/false,
7637 (*LIP
)->getType(), S
.getBeginLoc())));
7639 LoopScope
.restoreMap();
7640 CGF
.EmitOMPLinearClauseFinal(S
, [LIP
, &S
](CodeGenFunction
&CGF
) {
7641 return CGF
.Builder
.CreateIsNotNull(
7642 CGF
.EmitLoadOfScalar(CGF
.GetAddrOfLocalVar(*LIP
), /*Volatile=*/false,
7643 (*LIP
)->getType(), S
.getBeginLoc()));
7646 auto &&TaskGen
= [&S
, SharedsTy
, CapturedStruct
,
7647 IfCond
](CodeGenFunction
&CGF
, llvm::Function
*OutlinedFn
,
7648 const OMPTaskDataTy
&Data
) {
7649 auto &&CodeGen
= [&S
, OutlinedFn
, SharedsTy
, CapturedStruct
, IfCond
,
7650 &Data
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7651 OMPLoopScope
PreInitScope(CGF
, S
);
7652 CGF
.CGM
.getOpenMPRuntime().emitTaskLoopCall(CGF
, S
.getBeginLoc(), S
,
7653 OutlinedFn
, SharedsTy
,
7654 CapturedStruct
, IfCond
, Data
);
7656 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_taskloop
,
7660 EmitOMPTaskBasedDirective(S
, OMPD_taskloop
, BodyGen
, TaskGen
, Data
);
7662 CGM
.getOpenMPRuntime().emitTaskgroupRegion(
7664 [&S
, &BodyGen
, &TaskGen
, &Data
](CodeGenFunction
&CGF
,
7665 PrePostActionTy
&Action
) {
7667 CGF
.EmitOMPTaskBasedDirective(S
, OMPD_taskloop
, BodyGen
, TaskGen
,
7674 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective
&S
) {
7676 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7677 EmitOMPTaskLoopBasedDirective(S
);
7680 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
7681 const OMPTaskLoopSimdDirective
&S
) {
7683 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7684 OMPLexicalScope
Scope(*this, S
);
7685 EmitOMPTaskLoopBasedDirective(S
);
7688 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7689 const OMPMasterTaskLoopDirective
&S
) {
7690 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7692 EmitOMPTaskLoopBasedDirective(S
);
7695 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7696 OMPLexicalScope
Scope(*this, S
, std::nullopt
, /*EmitPreInitStmt=*/false);
7697 CGM
.getOpenMPRuntime().emitMasterRegion(*this, CodeGen
, S
.getBeginLoc());
7700 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7701 const OMPMasterTaskLoopSimdDirective
&S
) {
7702 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7704 EmitOMPTaskLoopBasedDirective(S
);
7707 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7708 OMPLexicalScope
Scope(*this, S
);
7709 CGM
.getOpenMPRuntime().emitMasterRegion(*this, CodeGen
, S
.getBeginLoc());
7712 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7713 const OMPParallelMasterTaskLoopDirective
&S
) {
7714 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7715 auto &&TaskLoopCodeGen
= [&S
](CodeGenFunction
&CGF
,
7716 PrePostActionTy
&Action
) {
7718 CGF
.EmitOMPTaskLoopBasedDirective(S
);
7720 OMPLexicalScope
Scope(CGF
, S
, OMPD_parallel
, /*EmitPreInitStmt=*/false);
7721 CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, TaskLoopCodeGen
,
7725 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7726 emitCommonOMPParallelDirective(*this, S
, OMPD_master_taskloop
, CodeGen
,
7727 emitEmptyBoundParameters
);
7730 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7731 const OMPParallelMasterTaskLoopSimdDirective
&S
) {
7732 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7733 auto &&TaskLoopCodeGen
= [&S
](CodeGenFunction
&CGF
,
7734 PrePostActionTy
&Action
) {
7736 CGF
.EmitOMPTaskLoopBasedDirective(S
);
7738 OMPLexicalScope
Scope(CGF
, S
, OMPD_parallel
, /*EmitPreInitStmt=*/false);
7739 CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, TaskLoopCodeGen
,
7743 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7744 emitCommonOMPParallelDirective(*this, S
, OMPD_master_taskloop_simd
, CodeGen
,
7745 emitEmptyBoundParameters
);
7748 // Generate the instructions for '#pragma omp target update' directive.
7749 void CodeGenFunction::EmitOMPTargetUpdateDirective(
7750 const OMPTargetUpdateDirective
&S
) {
7751 // If we don't have target devices, don't bother emitting the data mapping
7753 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
7756 // Check if we have any if clause associated with the directive.
7757 const Expr
*IfCond
= nullptr;
7758 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7759 IfCond
= C
->getCondition();
7761 // Check if we have any device clause associated with the directive.
7762 const Expr
*Device
= nullptr;
7763 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7764 Device
= C
->getDevice();
7766 OMPLexicalScope
Scope(*this, S
, OMPD_task
);
7767 CGM
.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S
, IfCond
, Device
);
7770 void CodeGenFunction::EmitOMPGenericLoopDirective(
7771 const OMPGenericLoopDirective
&S
) {
7772 // Unimplemented, just inline the underlying statement for now.
7773 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7774 // Emit the loop iteration variable.
7776 cast
<CapturedStmt
>(S
.getAssociatedStmt())->getCapturedStmt();
7777 const auto *ForS
= dyn_cast
<ForStmt
>(CS
);
7778 if (ForS
&& !isa
<DeclStmt
>(ForS
->getInit())) {
7779 OMPPrivateScope
LoopScope(CGF
);
7780 CGF
.EmitOMPPrivateLoopCounters(S
, LoopScope
);
7781 (void)LoopScope
.Privatize();
7783 LoopScope
.restoreMap();
7788 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
7789 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop
, CodeGen
);
7792 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
7793 const OMPExecutableDirective
&D
) {
7794 if (const auto *SD
= dyn_cast
<OMPScanDirective
>(&D
)) {
7795 EmitOMPScanDirective(*SD
);
7798 if (!D
.hasAssociatedStmt() || !D
.getAssociatedStmt())
7800 auto &&CodeGen
= [&D
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7801 OMPPrivateScope
GlobalsScope(CGF
);
7802 if (isOpenMPTaskingDirective(D
.getDirectiveKind())) {
7803 // Capture global firstprivates to avoid crash.
7804 for (const auto *C
: D
.getClausesOfKind
<OMPFirstprivateClause
>()) {
7805 for (const Expr
*Ref
: C
->varlists()) {
7806 const auto *DRE
= cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
7809 const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl());
7810 if (!VD
|| VD
->hasLocalStorage())
7812 if (!CGF
.LocalDeclMap
.count(VD
)) {
7813 LValue GlobLVal
= CGF
.EmitLValue(Ref
);
7814 GlobalsScope
.addPrivate(VD
, GlobLVal
.getAddress(CGF
));
7819 if (isOpenMPSimdDirective(D
.getDirectiveKind())) {
7820 (void)GlobalsScope
.Privatize();
7821 ParentLoopDirectiveForScanRegion
ScanRegion(CGF
, D
);
7822 emitOMPSimdRegion(CGF
, cast
<OMPLoopDirective
>(D
), Action
);
7824 if (const auto *LD
= dyn_cast
<OMPLoopDirective
>(&D
)) {
7825 for (const Expr
*E
: LD
->counters()) {
7826 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
7827 if (!VD
->hasLocalStorage() && !CGF
.LocalDeclMap
.count(VD
)) {
7828 LValue GlobLVal
= CGF
.EmitLValue(E
);
7829 GlobalsScope
.addPrivate(VD
, GlobLVal
.getAddress(CGF
));
7831 if (isa
<OMPCapturedExprDecl
>(VD
)) {
7832 // Emit only those that were not explicitly referenced in clauses.
7833 if (!CGF
.LocalDeclMap
.count(VD
))
7834 CGF
.EmitVarDecl(*VD
);
7837 for (const auto *C
: D
.getClausesOfKind
<OMPOrderedClause
>()) {
7838 if (!C
->getNumForLoops())
7840 for (unsigned I
= LD
->getLoopsNumber(),
7841 E
= C
->getLoopNumIterations().size();
7843 if (const auto *VD
= dyn_cast
<OMPCapturedExprDecl
>(
7844 cast
<DeclRefExpr
>(C
->getLoopCounter(I
))->getDecl())) {
7845 // Emit only those that were not explicitly referenced in clauses.
7846 if (!CGF
.LocalDeclMap
.count(VD
))
7847 CGF
.EmitVarDecl(*VD
);
7852 (void)GlobalsScope
.Privatize();
7853 CGF
.EmitStmt(D
.getInnermostCapturedStmt()->getCapturedStmt());
7856 if (D
.getDirectiveKind() == OMPD_atomic
||
7857 D
.getDirectiveKind() == OMPD_critical
||
7858 D
.getDirectiveKind() == OMPD_section
||
7859 D
.getDirectiveKind() == OMPD_master
||
7860 D
.getDirectiveKind() == OMPD_masked
) {
7861 EmitStmt(D
.getAssociatedStmt());
7864 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D
);
7865 OMPSimdLexicalScope
Scope(*this, D
);
7866 CGM
.getOpenMPRuntime().emitInlinedDirective(
7868 isOpenMPSimdDirective(D
.getDirectiveKind()) ? OMPD_simd
7869 : D
.getDirectiveKind(),
7872 // Check for outer lastprivate conditional update.
7873 checkForLastprivateConditionalUpdate(*this, D
);