1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This contains code to emit OpenMP nodes as LLVM code.
11 //===----------------------------------------------------------------------===//
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/BinaryFormat/Dwarf.h"
29 #include "llvm/Frontend/OpenMP/OMPConstants.h"
30 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/DebugInfoMetadata.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Metadata.h"
36 #include "llvm/Support/AtomicOrdering.h"
38 using namespace clang
;
39 using namespace CodeGen
;
40 using namespace llvm::omp
;
42 static const VarDecl
*getBaseDecl(const Expr
*Ref
);
45 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
46 /// for captured expressions.
47 class OMPLexicalScope
: public CodeGenFunction::LexicalScope
{
48 void emitPreInitStmt(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
49 for (const auto *C
: S
.clauses()) {
50 if (const auto *CPI
= OMPClauseWithPreInit::get(C
)) {
51 if (const auto *PreInit
=
52 cast_or_null
<DeclStmt
>(CPI
->getPreInitStmt())) {
53 for (const auto *I
: PreInit
->decls()) {
54 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
55 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
57 CodeGenFunction::AutoVarEmission Emission
=
58 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
59 CGF
.EmitAutoVarCleanups(Emission
);
66 CodeGenFunction::OMPPrivateScope InlinedShareds
;
68 static bool isCapturedVar(CodeGenFunction
&CGF
, const VarDecl
*VD
) {
69 return CGF
.LambdaCaptureFields
.lookup(VD
) ||
70 (CGF
.CapturedStmtInfo
&& CGF
.CapturedStmtInfo
->lookup(VD
)) ||
71 (CGF
.CurCodeDecl
&& isa
<BlockDecl
>(CGF
.CurCodeDecl
) &&
72 cast
<BlockDecl
>(CGF
.CurCodeDecl
)->capturesVariable(VD
));
77 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
78 const std::optional
<OpenMPDirectiveKind
> CapturedRegion
= std::nullopt
,
79 const bool EmitPreInitStmt
= true)
80 : CodeGenFunction::LexicalScope(CGF
, S
.getSourceRange()),
83 emitPreInitStmt(CGF
, S
);
86 assert(S
.hasAssociatedStmt() &&
87 "Expected associated statement for inlined directive.");
88 const CapturedStmt
*CS
= S
.getCapturedStmt(*CapturedRegion
);
89 for (const auto &C
: CS
->captures()) {
90 if (C
.capturesVariable() || C
.capturesVariableByCopy()) {
91 auto *VD
= C
.getCapturedVar();
92 assert(VD
== VD
->getCanonicalDecl() &&
93 "Canonical decl must be captured.");
95 CGF
.getContext(), const_cast<VarDecl
*>(VD
),
96 isCapturedVar(CGF
, VD
) || (CGF
.CapturedStmtInfo
&&
97 InlinedShareds
.isGlobalVarCaptured(VD
)),
98 VD
->getType().getNonReferenceType(), VK_LValue
, C
.getLocation());
99 InlinedShareds
.addPrivate(VD
, CGF
.EmitLValue(&DRE
).getAddress(CGF
));
102 (void)InlinedShareds
.Privatize();
106 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
107 /// for captured expressions.
108 class OMPParallelScope final
: public OMPLexicalScope
{
109 bool EmitPreInitStmt(const OMPExecutableDirective
&S
) {
110 OpenMPDirectiveKind Kind
= S
.getDirectiveKind();
111 return !(isOpenMPTargetExecutionDirective(Kind
) ||
112 isOpenMPLoopBoundSharingDirective(Kind
)) &&
113 isOpenMPParallelDirective(Kind
);
117 OMPParallelScope(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
118 : OMPLexicalScope(CGF
, S
, /*CapturedRegion=*/std::nullopt
,
119 EmitPreInitStmt(S
)) {}
122 /// Lexical scope for OpenMP teams construct, that handles correct codegen
123 /// for captured expressions.
124 class OMPTeamsScope final
: public OMPLexicalScope
{
125 bool EmitPreInitStmt(const OMPExecutableDirective
&S
) {
126 OpenMPDirectiveKind Kind
= S
.getDirectiveKind();
127 return !isOpenMPTargetExecutionDirective(Kind
) &&
128 isOpenMPTeamsDirective(Kind
);
132 OMPTeamsScope(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
133 : OMPLexicalScope(CGF
, S
, /*CapturedRegion=*/std::nullopt
,
134 EmitPreInitStmt(S
)) {}
137 /// Private scope for OpenMP loop-based directives, that supports capturing
138 /// of used expression from loop statement.
139 class OMPLoopScope
: public CodeGenFunction::RunCleanupsScope
{
140 void emitPreInitStmt(CodeGenFunction
&CGF
, const OMPLoopBasedDirective
&S
) {
141 const DeclStmt
*PreInits
;
142 CodeGenFunction::OMPMapVars PreCondVars
;
143 if (auto *LD
= dyn_cast
<OMPLoopDirective
>(&S
)) {
144 llvm::DenseSet
<const VarDecl
*> EmittedAsPrivate
;
145 for (const auto *E
: LD
->counters()) {
146 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
147 EmittedAsPrivate
.insert(VD
->getCanonicalDecl());
148 (void)PreCondVars
.setVarAddr(
149 CGF
, VD
, CGF
.CreateMemTemp(VD
->getType().getNonReferenceType()));
151 // Mark private vars as undefs.
152 for (const auto *C
: LD
->getClausesOfKind
<OMPPrivateClause
>()) {
153 for (const Expr
*IRef
: C
->varlists()) {
155 cast
<VarDecl
>(cast
<DeclRefExpr
>(IRef
)->getDecl());
156 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
157 QualType OrigVDTy
= OrigVD
->getType().getNonReferenceType();
158 (void)PreCondVars
.setVarAddr(
160 Address(llvm::UndefValue::get(CGF
.ConvertTypeForMem(
161 CGF
.getContext().getPointerType(OrigVDTy
))),
162 CGF
.ConvertTypeForMem(OrigVDTy
),
163 CGF
.getContext().getDeclAlign(OrigVD
)));
167 (void)PreCondVars
.apply(CGF
);
168 // Emit init, __range and __end variables for C++ range loops.
169 (void)OMPLoopBasedDirective::doForAllLoops(
170 LD
->getInnermostCapturedStmt()->getCapturedStmt(),
171 /*TryImperfectlyNestedLoops=*/true, LD
->getLoopsNumber(),
172 [&CGF
](unsigned Cnt
, const Stmt
*CurStmt
) {
173 if (const auto *CXXFor
= dyn_cast
<CXXForRangeStmt
>(CurStmt
)) {
174 if (const Stmt
*Init
= CXXFor
->getInit())
176 CGF
.EmitStmt(CXXFor
->getRangeStmt());
177 CGF
.EmitStmt(CXXFor
->getEndStmt());
181 PreInits
= cast_or_null
<DeclStmt
>(LD
->getPreInits());
182 } else if (const auto *Tile
= dyn_cast
<OMPTileDirective
>(&S
)) {
183 PreInits
= cast_or_null
<DeclStmt
>(Tile
->getPreInits());
184 } else if (const auto *Unroll
= dyn_cast
<OMPUnrollDirective
>(&S
)) {
185 PreInits
= cast_or_null
<DeclStmt
>(Unroll
->getPreInits());
187 llvm_unreachable("Unknown loop-based directive kind.");
190 for (const auto *I
: PreInits
->decls())
191 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
193 PreCondVars
.restore(CGF
);
197 OMPLoopScope(CodeGenFunction
&CGF
, const OMPLoopBasedDirective
&S
)
198 : CodeGenFunction::RunCleanupsScope(CGF
) {
199 emitPreInitStmt(CGF
, S
);
203 class OMPSimdLexicalScope
: public CodeGenFunction::LexicalScope
{
204 CodeGenFunction::OMPPrivateScope InlinedShareds
;
206 static bool isCapturedVar(CodeGenFunction
&CGF
, const VarDecl
*VD
) {
207 return CGF
.LambdaCaptureFields
.lookup(VD
) ||
208 (CGF
.CapturedStmtInfo
&& CGF
.CapturedStmtInfo
->lookup(VD
)) ||
209 (CGF
.CurCodeDecl
&& isa
<BlockDecl
>(CGF
.CurCodeDecl
) &&
210 cast
<BlockDecl
>(CGF
.CurCodeDecl
)->capturesVariable(VD
));
214 OMPSimdLexicalScope(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
215 : CodeGenFunction::LexicalScope(CGF
, S
.getSourceRange()),
216 InlinedShareds(CGF
) {
217 for (const auto *C
: S
.clauses()) {
218 if (const auto *CPI
= OMPClauseWithPreInit::get(C
)) {
219 if (const auto *PreInit
=
220 cast_or_null
<DeclStmt
>(CPI
->getPreInitStmt())) {
221 for (const auto *I
: PreInit
->decls()) {
222 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
223 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
225 CodeGenFunction::AutoVarEmission Emission
=
226 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
227 CGF
.EmitAutoVarCleanups(Emission
);
231 } else if (const auto *UDP
= dyn_cast
<OMPUseDevicePtrClause
>(C
)) {
232 for (const Expr
*E
: UDP
->varlists()) {
233 const Decl
*D
= cast
<DeclRefExpr
>(E
)->getDecl();
234 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(D
))
235 CGF
.EmitVarDecl(*OED
);
237 } else if (const auto *UDP
= dyn_cast
<OMPUseDeviceAddrClause
>(C
)) {
238 for (const Expr
*E
: UDP
->varlists()) {
239 const Decl
*D
= getBaseDecl(E
);
240 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(D
))
241 CGF
.EmitVarDecl(*OED
);
245 if (!isOpenMPSimdDirective(S
.getDirectiveKind()))
246 CGF
.EmitOMPPrivateClause(S
, InlinedShareds
);
247 if (const auto *TG
= dyn_cast
<OMPTaskgroupDirective
>(&S
)) {
248 if (const Expr
*E
= TG
->getReductionRef())
249 CGF
.EmitVarDecl(*cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl()));
251 // Temp copy arrays for inscan reductions should not be emitted as they are
252 // not used in simd only mode.
253 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> CopyArrayTemps
;
254 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
255 if (C
->getModifier() != OMPC_REDUCTION_inscan
)
257 for (const Expr
*E
: C
->copy_array_temps())
258 CopyArrayTemps
.insert(cast
<DeclRefExpr
>(E
)->getDecl());
260 const auto *CS
= cast_or_null
<CapturedStmt
>(S
.getAssociatedStmt());
262 for (auto &C
: CS
->captures()) {
263 if (C
.capturesVariable() || C
.capturesVariableByCopy()) {
264 auto *VD
= C
.getCapturedVar();
265 if (CopyArrayTemps
.contains(VD
))
267 assert(VD
== VD
->getCanonicalDecl() &&
268 "Canonical decl must be captured.");
269 DeclRefExpr
DRE(CGF
.getContext(), const_cast<VarDecl
*>(VD
),
270 isCapturedVar(CGF
, VD
) ||
271 (CGF
.CapturedStmtInfo
&&
272 InlinedShareds
.isGlobalVarCaptured(VD
)),
273 VD
->getType().getNonReferenceType(), VK_LValue
,
275 InlinedShareds
.addPrivate(VD
, CGF
.EmitLValue(&DRE
).getAddress(CGF
));
278 CS
= dyn_cast
<CapturedStmt
>(CS
->getCapturedStmt());
280 (void)InlinedShareds
.Privatize();
286 static void emitCommonOMPTargetDirective(CodeGenFunction
&CGF
,
287 const OMPExecutableDirective
&S
,
288 const RegionCodeGenTy
&CodeGen
);
290 LValue
CodeGenFunction::EmitOMPSharedLValue(const Expr
*E
) {
291 if (const auto *OrigDRE
= dyn_cast
<DeclRefExpr
>(E
)) {
292 if (const auto *OrigVD
= dyn_cast
<VarDecl
>(OrigDRE
->getDecl())) {
293 OrigVD
= OrigVD
->getCanonicalDecl();
295 LambdaCaptureFields
.lookup(OrigVD
) ||
296 (CapturedStmtInfo
&& CapturedStmtInfo
->lookup(OrigVD
)) ||
297 (CurCodeDecl
&& isa
<BlockDecl
>(CurCodeDecl
));
298 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
), IsCaptured
,
299 OrigDRE
->getType(), VK_LValue
, OrigDRE
->getExprLoc());
300 return EmitLValue(&DRE
);
303 return EmitLValue(E
);
306 llvm::Value
*CodeGenFunction::getTypeSize(QualType Ty
) {
307 ASTContext
&C
= getContext();
308 llvm::Value
*Size
= nullptr;
309 auto SizeInChars
= C
.getTypeSizeInChars(Ty
);
310 if (SizeInChars
.isZero()) {
311 // getTypeSizeInChars() returns 0 for a VLA.
312 while (const VariableArrayType
*VAT
= C
.getAsVariableArrayType(Ty
)) {
313 VlaSizePair VlaSize
= getVLASize(VAT
);
316 Size
? Builder
.CreateNUWMul(Size
, VlaSize
.NumElts
) : VlaSize
.NumElts
;
318 SizeInChars
= C
.getTypeSizeInChars(Ty
);
319 if (SizeInChars
.isZero())
320 return llvm::ConstantInt::get(SizeTy
, /*V=*/0);
321 return Builder
.CreateNUWMul(Size
, CGM
.getSize(SizeInChars
));
323 return CGM
.getSize(SizeInChars
);
326 void CodeGenFunction::GenerateOpenMPCapturedVars(
327 const CapturedStmt
&S
, SmallVectorImpl
<llvm::Value
*> &CapturedVars
) {
328 const RecordDecl
*RD
= S
.getCapturedRecordDecl();
329 auto CurField
= RD
->field_begin();
330 auto CurCap
= S
.captures().begin();
331 for (CapturedStmt::const_capture_init_iterator I
= S
.capture_init_begin(),
332 E
= S
.capture_init_end();
333 I
!= E
; ++I
, ++CurField
, ++CurCap
) {
334 if (CurField
->hasCapturedVLAType()) {
335 const VariableArrayType
*VAT
= CurField
->getCapturedVLAType();
336 llvm::Value
*Val
= VLASizeMap
[VAT
->getSizeExpr()];
337 CapturedVars
.push_back(Val
);
338 } else if (CurCap
->capturesThis()) {
339 CapturedVars
.push_back(CXXThisValue
);
340 } else if (CurCap
->capturesVariableByCopy()) {
341 llvm::Value
*CV
= EmitLoadOfScalar(EmitLValue(*I
), CurCap
->getLocation());
343 // If the field is not a pointer, we need to save the actual value
344 // and load it as a void pointer.
345 if (!CurField
->getType()->isAnyPointerType()) {
346 ASTContext
&Ctx
= getContext();
347 Address DstAddr
= CreateMemTemp(
348 Ctx
.getUIntPtrType(),
349 Twine(CurCap
->getCapturedVar()->getName(), ".casted"));
350 LValue DstLV
= MakeAddrLValue(DstAddr
, Ctx
.getUIntPtrType());
352 llvm::Value
*SrcAddrVal
= EmitScalarConversion(
353 DstAddr
.getPointer(), Ctx
.getPointerType(Ctx
.getUIntPtrType()),
354 Ctx
.getPointerType(CurField
->getType()), CurCap
->getLocation());
356 MakeNaturalAlignAddrLValue(SrcAddrVal
, CurField
->getType());
358 // Store the value using the source type pointer.
359 EmitStoreThroughLValue(RValue::get(CV
), SrcLV
);
361 // Load the value using the destination type pointer.
362 CV
= EmitLoadOfScalar(DstLV
, CurCap
->getLocation());
364 CapturedVars
.push_back(CV
);
366 assert(CurCap
->capturesVariable() && "Expected capture by reference.");
367 CapturedVars
.push_back(EmitLValue(*I
).getAddress(*this).getPointer());
372 static Address
castValueFromUintptr(CodeGenFunction
&CGF
, SourceLocation Loc
,
373 QualType DstType
, StringRef Name
,
375 ASTContext
&Ctx
= CGF
.getContext();
377 llvm::Value
*CastedPtr
= CGF
.EmitScalarConversion(
378 AddrLV
.getAddress(CGF
).getPointer(), Ctx
.getUIntPtrType(),
379 Ctx
.getPointerType(DstType
), Loc
);
381 CGF
.MakeNaturalAlignAddrLValue(CastedPtr
, DstType
).getAddress(CGF
);
385 static QualType
getCanonicalParamType(ASTContext
&C
, QualType T
) {
386 if (T
->isLValueReferenceType())
387 return C
.getLValueReferenceType(
388 getCanonicalParamType(C
, T
.getNonReferenceType()),
389 /*SpelledAsLValue=*/false);
390 if (T
->isPointerType())
391 return C
.getPointerType(getCanonicalParamType(C
, T
->getPointeeType()));
392 if (const ArrayType
*A
= T
->getAsArrayTypeUnsafe()) {
393 if (const auto *VLA
= dyn_cast
<VariableArrayType
>(A
))
394 return getCanonicalParamType(C
, VLA
->getElementType());
395 if (!A
->isVariablyModifiedType())
396 return C
.getCanonicalType(T
);
398 return C
.getCanonicalParamType(T
);
402 /// Contains required data for proper outlined function codegen.
403 struct FunctionOptions
{
404 /// Captured statement for which the function is generated.
405 const CapturedStmt
*S
= nullptr;
406 /// true if cast to/from UIntPtr is required for variables captured by
408 const bool UIntPtrCastRequired
= true;
409 /// true if only casted arguments must be registered as local args or VLA
411 const bool RegisterCastedArgsOnly
= false;
412 /// Name of the generated function.
413 const StringRef FunctionName
;
414 /// Location of the non-debug version of the outlined function.
416 explicit FunctionOptions(const CapturedStmt
*S
, bool UIntPtrCastRequired
,
417 bool RegisterCastedArgsOnly
, StringRef FunctionName
,
419 : S(S
), UIntPtrCastRequired(UIntPtrCastRequired
),
420 RegisterCastedArgsOnly(UIntPtrCastRequired
&& RegisterCastedArgsOnly
),
421 FunctionName(FunctionName
), Loc(Loc
) {}
425 static llvm::Function
*emitOutlinedFunctionPrologue(
426 CodeGenFunction
&CGF
, FunctionArgList
&Args
,
427 llvm::MapVector
<const Decl
*, std::pair
<const VarDecl
*, Address
>>
429 llvm::DenseMap
<const Decl
*, std::pair
<const Expr
*, llvm::Value
*>>
431 llvm::Value
*&CXXThisValue
, const FunctionOptions
&FO
) {
432 const CapturedDecl
*CD
= FO
.S
->getCapturedDecl();
433 const RecordDecl
*RD
= FO
.S
->getCapturedRecordDecl();
434 assert(CD
->hasBody() && "missing CapturedDecl body");
436 CXXThisValue
= nullptr;
437 // Build the argument list.
438 CodeGenModule
&CGM
= CGF
.CGM
;
439 ASTContext
&Ctx
= CGM
.getContext();
440 FunctionArgList TargetArgs
;
441 Args
.append(CD
->param_begin(),
442 std::next(CD
->param_begin(), CD
->getContextParamPosition()));
445 std::next(CD
->param_begin(), CD
->getContextParamPosition()));
446 auto I
= FO
.S
->captures().begin();
447 FunctionDecl
*DebugFunctionDecl
= nullptr;
448 if (!FO
.UIntPtrCastRequired
) {
449 FunctionProtoType::ExtProtoInfo EPI
;
450 QualType FunctionTy
= Ctx
.getFunctionType(Ctx
.VoidTy
, std::nullopt
, EPI
);
451 DebugFunctionDecl
= FunctionDecl::Create(
452 Ctx
, Ctx
.getTranslationUnitDecl(), FO
.S
->getBeginLoc(),
453 SourceLocation(), DeclarationName(), FunctionTy
,
454 Ctx
.getTrivialTypeSourceInfo(FunctionTy
), SC_Static
,
455 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
456 /*hasWrittenPrototype=*/false);
458 for (const FieldDecl
*FD
: RD
->fields()) {
459 QualType ArgType
= FD
->getType();
460 IdentifierInfo
*II
= nullptr;
461 VarDecl
*CapVar
= nullptr;
463 // If this is a capture by copy and the type is not a pointer, the outlined
464 // function argument type should be uintptr and the value properly casted to
465 // uintptr. This is necessary given that the runtime library is only able to
466 // deal with pointers. We can pass in the same way the VLA type sizes to the
467 // outlined function.
468 if (FO
.UIntPtrCastRequired
&&
469 ((I
->capturesVariableByCopy() && !ArgType
->isAnyPointerType()) ||
470 I
->capturesVariableArrayType()))
471 ArgType
= Ctx
.getUIntPtrType();
473 if (I
->capturesVariable() || I
->capturesVariableByCopy()) {
474 CapVar
= I
->getCapturedVar();
475 II
= CapVar
->getIdentifier();
476 } else if (I
->capturesThis()) {
477 II
= &Ctx
.Idents
.get("this");
479 assert(I
->capturesVariableArrayType());
480 II
= &Ctx
.Idents
.get("vla");
482 if (ArgType
->isVariablyModifiedType())
483 ArgType
= getCanonicalParamType(Ctx
, ArgType
);
485 if (CapVar
&& (CapVar
->getTLSKind() != clang::VarDecl::TLS_None
)) {
486 Arg
= ImplicitParamDecl::Create(Ctx
, /*DC=*/nullptr, FD
->getLocation(),
488 ImplicitParamDecl::ThreadPrivateVar
);
489 } else if (DebugFunctionDecl
&& (CapVar
|| I
->capturesThis())) {
490 Arg
= ParmVarDecl::Create(
491 Ctx
, DebugFunctionDecl
,
492 CapVar
? CapVar
->getBeginLoc() : FD
->getBeginLoc(),
493 CapVar
? CapVar
->getLocation() : FD
->getLocation(), II
, ArgType
,
494 /*TInfo=*/nullptr, SC_None
, /*DefArg=*/nullptr);
496 Arg
= ImplicitParamDecl::Create(Ctx
, /*DC=*/nullptr, FD
->getLocation(),
497 II
, ArgType
, ImplicitParamDecl::Other
);
499 Args
.emplace_back(Arg
);
500 // Do not cast arguments if we emit function with non-original types.
501 TargetArgs
.emplace_back(
502 FO
.UIntPtrCastRequired
504 : CGM
.getOpenMPRuntime().translateParameter(FD
, Arg
));
507 Args
.append(std::next(CD
->param_begin(), CD
->getContextParamPosition() + 1),
510 std::next(CD
->param_begin(), CD
->getContextParamPosition() + 1),
513 // Create the function declaration.
514 const CGFunctionInfo
&FuncInfo
=
515 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(Ctx
.VoidTy
, TargetArgs
);
516 llvm::FunctionType
*FuncLLVMTy
= CGM
.getTypes().GetFunctionType(FuncInfo
);
519 llvm::Function::Create(FuncLLVMTy
, llvm::GlobalValue::InternalLinkage
,
520 FO
.FunctionName
, &CGM
.getModule());
521 CGM
.SetInternalFunctionAttributes(CD
, F
, FuncInfo
);
523 F
->setDoesNotThrow();
524 F
->setDoesNotRecurse();
526 // Always inline the outlined function if optimizations are enabled.
527 if (CGM
.getCodeGenOpts().OptimizationLevel
!= 0) {
528 F
->removeFnAttr(llvm::Attribute::NoInline
);
529 F
->addFnAttr(llvm::Attribute::AlwaysInline
);
532 // Generate the function.
533 CGF
.StartFunction(CD
, Ctx
.VoidTy
, F
, FuncInfo
, TargetArgs
,
534 FO
.UIntPtrCastRequired
? FO
.Loc
: FO
.S
->getBeginLoc(),
535 FO
.UIntPtrCastRequired
? FO
.Loc
536 : CD
->getBody()->getBeginLoc());
537 unsigned Cnt
= CD
->getContextParamPosition();
538 I
= FO
.S
->captures().begin();
539 for (const FieldDecl
*FD
: RD
->fields()) {
540 // Do not map arguments if we emit function with non-original types.
541 Address
LocalAddr(Address::invalid());
542 if (!FO
.UIntPtrCastRequired
&& Args
[Cnt
] != TargetArgs
[Cnt
]) {
543 LocalAddr
= CGM
.getOpenMPRuntime().getParameterAddress(CGF
, Args
[Cnt
],
546 LocalAddr
= CGF
.GetAddrOfLocalVar(Args
[Cnt
]);
548 // If we are capturing a pointer by copy we don't need to do anything, just
549 // use the value that we get from the arguments.
550 if (I
->capturesVariableByCopy() && FD
->getType()->isAnyPointerType()) {
551 const VarDecl
*CurVD
= I
->getCapturedVar();
552 if (!FO
.RegisterCastedArgsOnly
)
553 LocalAddrs
.insert({Args
[Cnt
], {CurVD
, LocalAddr
}});
559 LValue ArgLVal
= CGF
.MakeAddrLValue(LocalAddr
, Args
[Cnt
]->getType(),
560 AlignmentSource::Decl
);
561 if (FD
->hasCapturedVLAType()) {
562 if (FO
.UIntPtrCastRequired
) {
563 ArgLVal
= CGF
.MakeAddrLValue(
564 castValueFromUintptr(CGF
, I
->getLocation(), FD
->getType(),
565 Args
[Cnt
]->getName(), ArgLVal
),
566 FD
->getType(), AlignmentSource::Decl
);
568 llvm::Value
*ExprArg
= CGF
.EmitLoadOfScalar(ArgLVal
, I
->getLocation());
569 const VariableArrayType
*VAT
= FD
->getCapturedVLAType();
570 VLASizes
.try_emplace(Args
[Cnt
], VAT
->getSizeExpr(), ExprArg
);
571 } else if (I
->capturesVariable()) {
572 const VarDecl
*Var
= I
->getCapturedVar();
573 QualType VarTy
= Var
->getType();
574 Address ArgAddr
= ArgLVal
.getAddress(CGF
);
575 if (ArgLVal
.getType()->isLValueReferenceType()) {
576 ArgAddr
= CGF
.EmitLoadOfReference(ArgLVal
);
577 } else if (!VarTy
->isVariablyModifiedType() || !VarTy
->isPointerType()) {
578 assert(ArgLVal
.getType()->isPointerType());
579 ArgAddr
= CGF
.EmitLoadOfPointer(
580 ArgAddr
, ArgLVal
.getType()->castAs
<PointerType
>());
582 if (!FO
.RegisterCastedArgsOnly
) {
584 {Args
[Cnt
], {Var
, ArgAddr
.withAlignment(Ctx
.getDeclAlign(Var
))}});
586 } else if (I
->capturesVariableByCopy()) {
587 assert(!FD
->getType()->isAnyPointerType() &&
588 "Not expecting a captured pointer.");
589 const VarDecl
*Var
= I
->getCapturedVar();
590 LocalAddrs
.insert({Args
[Cnt
],
591 {Var
, FO
.UIntPtrCastRequired
592 ? castValueFromUintptr(
593 CGF
, I
->getLocation(), FD
->getType(),
594 Args
[Cnt
]->getName(), ArgLVal
)
595 : ArgLVal
.getAddress(CGF
)}});
597 // If 'this' is captured, load it into CXXThisValue.
598 assert(I
->capturesThis());
599 CXXThisValue
= CGF
.EmitLoadOfScalar(ArgLVal
, I
->getLocation());
600 LocalAddrs
.insert({Args
[Cnt
], {nullptr, ArgLVal
.getAddress(CGF
)}});
610 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt
&S
,
611 SourceLocation Loc
) {
614 "CapturedStmtInfo should be set when generating the captured function");
615 const CapturedDecl
*CD
= S
.getCapturedDecl();
616 // Build the argument list.
617 bool NeedWrapperFunction
=
618 getDebugInfo() && CGM
.getCodeGenOpts().hasReducedDebugInfo();
619 FunctionArgList Args
;
620 llvm::MapVector
<const Decl
*, std::pair
<const VarDecl
*, Address
>> LocalAddrs
;
621 llvm::DenseMap
<const Decl
*, std::pair
<const Expr
*, llvm::Value
*>> VLASizes
;
622 SmallString
<256> Buffer
;
623 llvm::raw_svector_ostream
Out(Buffer
);
624 Out
<< CapturedStmtInfo
->getHelperName();
625 if (NeedWrapperFunction
)
627 FunctionOptions
FO(&S
, !NeedWrapperFunction
, /*RegisterCastedArgsOnly=*/false,
629 llvm::Function
*F
= emitOutlinedFunctionPrologue(*this, Args
, LocalAddrs
,
630 VLASizes
, CXXThisValue
, FO
);
631 CodeGenFunction::OMPPrivateScope
LocalScope(*this);
632 for (const auto &LocalAddrPair
: LocalAddrs
) {
633 if (LocalAddrPair
.second
.first
) {
634 LocalScope
.addPrivate(LocalAddrPair
.second
.first
,
635 LocalAddrPair
.second
.second
);
638 (void)LocalScope
.Privatize();
639 for (const auto &VLASizePair
: VLASizes
)
640 VLASizeMap
[VLASizePair
.second
.first
] = VLASizePair
.second
.second
;
641 PGO
.assignRegionCounters(GlobalDecl(CD
), F
);
642 CapturedStmtInfo
->EmitBody(*this, CD
->getBody());
643 (void)LocalScope
.ForceCleanup();
644 FinishFunction(CD
->getBodyRBrace());
645 if (!NeedWrapperFunction
)
648 FunctionOptions
WrapperFO(&S
, /*UIntPtrCastRequired=*/true,
649 /*RegisterCastedArgsOnly=*/true,
650 CapturedStmtInfo
->getHelperName(), Loc
);
651 CodeGenFunction
WrapperCGF(CGM
, /*suppressNewContext=*/true);
652 WrapperCGF
.CapturedStmtInfo
= CapturedStmtInfo
;
656 llvm::Function
*WrapperF
=
657 emitOutlinedFunctionPrologue(WrapperCGF
, Args
, LocalAddrs
, VLASizes
,
658 WrapperCGF
.CXXThisValue
, WrapperFO
);
659 llvm::SmallVector
<llvm::Value
*, 4> CallArgs
;
660 auto *PI
= F
->arg_begin();
661 for (const auto *Arg
: Args
) {
662 llvm::Value
*CallArg
;
663 auto I
= LocalAddrs
.find(Arg
);
664 if (I
!= LocalAddrs
.end()) {
665 LValue LV
= WrapperCGF
.MakeAddrLValue(
667 I
->second
.first
? I
->second
.first
->getType() : Arg
->getType(),
668 AlignmentSource::Decl
);
669 if (LV
.getType()->isAnyComplexType())
670 LV
.setAddress(WrapperCGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
671 LV
.getAddress(WrapperCGF
),
672 PI
->getType()->getPointerTo(
673 LV
.getAddress(WrapperCGF
).getAddressSpace()),
675 CallArg
= WrapperCGF
.EmitLoadOfScalar(LV
, S
.getBeginLoc());
677 auto EI
= VLASizes
.find(Arg
);
678 if (EI
!= VLASizes
.end()) {
679 CallArg
= EI
->second
.second
;
682 WrapperCGF
.MakeAddrLValue(WrapperCGF
.GetAddrOfLocalVar(Arg
),
683 Arg
->getType(), AlignmentSource::Decl
);
684 CallArg
= WrapperCGF
.EmitLoadOfScalar(LV
, S
.getBeginLoc());
687 CallArgs
.emplace_back(WrapperCGF
.EmitFromMemory(CallArg
, Arg
->getType()));
690 CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF
, Loc
, F
, CallArgs
);
691 WrapperCGF
.FinishFunction();
695 //===----------------------------------------------------------------------===//
696 // OpenMP Directive Emission
697 //===----------------------------------------------------------------------===//
698 void CodeGenFunction::EmitOMPAggregateAssign(
699 Address DestAddr
, Address SrcAddr
, QualType OriginalType
,
700 const llvm::function_ref
<void(Address
, Address
)> CopyGen
) {
701 // Perform element-by-element initialization.
704 // Drill down to the base element type on both arrays.
705 const ArrayType
*ArrayTy
= OriginalType
->getAsArrayTypeUnsafe();
706 llvm::Value
*NumElements
= emitArrayLength(ArrayTy
, ElementTy
, DestAddr
);
707 SrcAddr
= Builder
.CreateElementBitCast(SrcAddr
, DestAddr
.getElementType());
709 llvm::Value
*SrcBegin
= SrcAddr
.getPointer();
710 llvm::Value
*DestBegin
= DestAddr
.getPointer();
711 // Cast from pointer to array type to pointer to single element.
712 llvm::Value
*DestEnd
= Builder
.CreateInBoundsGEP(DestAddr
.getElementType(),
713 DestBegin
, NumElements
);
715 // The basic structure here is a while-do loop.
716 llvm::BasicBlock
*BodyBB
= createBasicBlock("omp.arraycpy.body");
717 llvm::BasicBlock
*DoneBB
= createBasicBlock("omp.arraycpy.done");
718 llvm::Value
*IsEmpty
=
719 Builder
.CreateICmpEQ(DestBegin
, DestEnd
, "omp.arraycpy.isempty");
720 Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
722 // Enter the loop body, making that address the current address.
723 llvm::BasicBlock
*EntryBB
= Builder
.GetInsertBlock();
726 CharUnits ElementSize
= getContext().getTypeSizeInChars(ElementTy
);
728 llvm::PHINode
*SrcElementPHI
=
729 Builder
.CreatePHI(SrcBegin
->getType(), 2, "omp.arraycpy.srcElementPast");
730 SrcElementPHI
->addIncoming(SrcBegin
, EntryBB
);
731 Address SrcElementCurrent
=
732 Address(SrcElementPHI
, SrcAddr
.getElementType(),
733 SrcAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
735 llvm::PHINode
*DestElementPHI
= Builder
.CreatePHI(
736 DestBegin
->getType(), 2, "omp.arraycpy.destElementPast");
737 DestElementPHI
->addIncoming(DestBegin
, EntryBB
);
738 Address DestElementCurrent
=
739 Address(DestElementPHI
, DestAddr
.getElementType(),
740 DestAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
743 CopyGen(DestElementCurrent
, SrcElementCurrent
);
745 // Shift the address forward by one element.
746 llvm::Value
*DestElementNext
=
747 Builder
.CreateConstGEP1_32(DestAddr
.getElementType(), DestElementPHI
,
748 /*Idx0=*/1, "omp.arraycpy.dest.element");
749 llvm::Value
*SrcElementNext
=
750 Builder
.CreateConstGEP1_32(SrcAddr
.getElementType(), SrcElementPHI
,
751 /*Idx0=*/1, "omp.arraycpy.src.element");
752 // Check whether we've reached the end.
754 Builder
.CreateICmpEQ(DestElementNext
, DestEnd
, "omp.arraycpy.done");
755 Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
756 DestElementPHI
->addIncoming(DestElementNext
, Builder
.GetInsertBlock());
757 SrcElementPHI
->addIncoming(SrcElementNext
, Builder
.GetInsertBlock());
760 EmitBlock(DoneBB
, /*IsFinished=*/true);
763 void CodeGenFunction::EmitOMPCopy(QualType OriginalType
, Address DestAddr
,
764 Address SrcAddr
, const VarDecl
*DestVD
,
765 const VarDecl
*SrcVD
, const Expr
*Copy
) {
766 if (OriginalType
->isArrayType()) {
767 const auto *BO
= dyn_cast
<BinaryOperator
>(Copy
);
768 if (BO
&& BO
->getOpcode() == BO_Assign
) {
769 // Perform simple memcpy for simple copying.
770 LValue Dest
= MakeAddrLValue(DestAddr
, OriginalType
);
771 LValue Src
= MakeAddrLValue(SrcAddr
, OriginalType
);
772 EmitAggregateAssign(Dest
, Src
, OriginalType
);
774 // For arrays with complex element types perform element by element
776 EmitOMPAggregateAssign(
777 DestAddr
, SrcAddr
, OriginalType
,
778 [this, Copy
, SrcVD
, DestVD
](Address DestElement
, Address SrcElement
) {
779 // Working with the single array element, so have to remap
780 // destination and source variables to corresponding array
782 CodeGenFunction::OMPPrivateScope
Remap(*this);
783 Remap
.addPrivate(DestVD
, DestElement
);
784 Remap
.addPrivate(SrcVD
, SrcElement
);
785 (void)Remap
.Privatize();
786 EmitIgnoredExpr(Copy
);
790 // Remap pseudo source variable to private copy.
791 CodeGenFunction::OMPPrivateScope
Remap(*this);
792 Remap
.addPrivate(SrcVD
, SrcAddr
);
793 Remap
.addPrivate(DestVD
, DestAddr
);
794 (void)Remap
.Privatize();
795 // Emit copying of the whole variable.
796 EmitIgnoredExpr(Copy
);
800 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective
&D
,
801 OMPPrivateScope
&PrivateScope
) {
802 if (!HaveInsertPoint())
804 bool DeviceConstTarget
=
805 getLangOpts().OpenMPIsDevice
&&
806 isOpenMPTargetExecutionDirective(D
.getDirectiveKind());
807 bool FirstprivateIsLastprivate
= false;
808 llvm::DenseMap
<const VarDecl
*, OpenMPLastprivateModifier
> Lastprivates
;
809 for (const auto *C
: D
.getClausesOfKind
<OMPLastprivateClause
>()) {
810 for (const auto *D
: C
->varlists())
811 Lastprivates
.try_emplace(
812 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
)->getDecl())->getCanonicalDecl(),
815 llvm::DenseSet
<const VarDecl
*> EmittedAsFirstprivate
;
816 llvm::SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
817 getOpenMPCaptureRegions(CaptureRegions
, D
.getDirectiveKind());
818 // Force emission of the firstprivate copy if the directive does not emit
819 // outlined function, like omp for, omp simd, omp distribute etc.
820 bool MustEmitFirstprivateCopy
=
821 CaptureRegions
.size() == 1 && CaptureRegions
.back() == OMPD_unknown
;
822 for (const auto *C
: D
.getClausesOfKind
<OMPFirstprivateClause
>()) {
823 const auto *IRef
= C
->varlist_begin();
824 const auto *InitsRef
= C
->inits().begin();
825 for (const Expr
*IInit
: C
->private_copies()) {
826 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
827 bool ThisFirstprivateIsLastprivate
=
828 Lastprivates
.count(OrigVD
->getCanonicalDecl()) > 0;
829 const FieldDecl
*FD
= CapturedStmtInfo
->lookup(OrigVD
);
830 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IInit
)->getDecl());
831 if (!MustEmitFirstprivateCopy
&& !ThisFirstprivateIsLastprivate
&& FD
&&
832 !FD
->getType()->isReferenceType() &&
833 (!VD
|| !VD
->hasAttr
<OMPAllocateDeclAttr
>())) {
834 EmittedAsFirstprivate
.insert(OrigVD
->getCanonicalDecl());
839 // Do not emit copy for firstprivate constant variables in target regions,
840 // captured by reference.
841 if (DeviceConstTarget
&& OrigVD
->getType().isConstant(getContext()) &&
842 FD
&& FD
->getType()->isReferenceType() &&
843 (!VD
|| !VD
->hasAttr
<OMPAllocateDeclAttr
>())) {
844 EmittedAsFirstprivate
.insert(OrigVD
->getCanonicalDecl());
849 FirstprivateIsLastprivate
=
850 FirstprivateIsLastprivate
|| ThisFirstprivateIsLastprivate
;
851 if (EmittedAsFirstprivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
853 cast
<VarDecl
>(cast
<DeclRefExpr
>(*InitsRef
)->getDecl());
855 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
856 /*RefersToEnclosingVariableOrCapture=*/FD
!= nullptr,
857 (*IRef
)->getType(), VK_LValue
, (*IRef
)->getExprLoc());
860 // Check if the firstprivate variable is just a constant value.
861 ConstantEmission CE
= tryEmitAsConstant(&DRE
);
862 if (CE
&& !CE
.isReference()) {
863 // Constant value, no need to create a copy.
868 if (CE
&& CE
.isReference()) {
869 OriginalLVal
= CE
.getReferenceLValue(*this, &DRE
);
871 assert(!CE
&& "Expected non-constant firstprivate.");
872 OriginalLVal
= EmitLValue(&DRE
);
875 OriginalLVal
= EmitLValue(&DRE
);
877 QualType Type
= VD
->getType();
878 if (Type
->isArrayType()) {
879 // Emit VarDecl with copy init for arrays.
880 // Get the address of the original variable captured in current
882 AutoVarEmission Emission
= EmitAutoVarAlloca(*VD
);
883 const Expr
*Init
= VD
->getInit();
884 if (!isa
<CXXConstructExpr
>(Init
) || isTrivialInitializer(Init
)) {
885 // Perform simple memcpy.
886 LValue Dest
= MakeAddrLValue(Emission
.getAllocatedAddress(), Type
);
887 EmitAggregateAssign(Dest
, OriginalLVal
, Type
);
889 EmitOMPAggregateAssign(
890 Emission
.getAllocatedAddress(), OriginalLVal
.getAddress(*this),
892 [this, VDInit
, Init
](Address DestElement
, Address SrcElement
) {
893 // Clean up any temporaries needed by the
895 RunCleanupsScope
InitScope(*this);
896 // Emit initialization for single element.
897 setAddrOfLocalVar(VDInit
, SrcElement
);
898 EmitAnyExprToMem(Init
, DestElement
,
899 Init
->getType().getQualifiers(),
900 /*IsInitializer*/ false);
901 LocalDeclMap
.erase(VDInit
);
904 EmitAutoVarCleanups(Emission
);
906 PrivateScope
.addPrivate(OrigVD
, Emission
.getAllocatedAddress());
908 Address OriginalAddr
= OriginalLVal
.getAddress(*this);
909 // Emit private VarDecl with copy init.
910 // Remap temp VDInit variable to the address of the original
911 // variable (for proper handling of captured global variables).
912 setAddrOfLocalVar(VDInit
, OriginalAddr
);
914 LocalDeclMap
.erase(VDInit
);
915 Address VDAddr
= GetAddrOfLocalVar(VD
);
916 if (ThisFirstprivateIsLastprivate
&&
917 Lastprivates
[OrigVD
->getCanonicalDecl()] ==
918 OMPC_LASTPRIVATE_conditional
) {
919 // Create/init special variable for lastprivate conditionals.
921 EmitLoadOfScalar(MakeAddrLValue(VDAddr
, (*IRef
)->getType(),
922 AlignmentSource::Decl
),
923 (*IRef
)->getExprLoc());
924 VDAddr
= CGM
.getOpenMPRuntime().emitLastprivateConditionalInit(
926 EmitStoreOfScalar(V
, MakeAddrLValue(VDAddr
, (*IRef
)->getType(),
927 AlignmentSource::Decl
));
928 LocalDeclMap
.erase(VD
);
929 setAddrOfLocalVar(VD
, VDAddr
);
931 IsRegistered
= PrivateScope
.addPrivate(OrigVD
, VDAddr
);
933 assert(IsRegistered
&&
934 "firstprivate var already registered as private");
935 // Silence the warning about unused variable.
942 return FirstprivateIsLastprivate
&& !EmittedAsFirstprivate
.empty();
945 void CodeGenFunction::EmitOMPPrivateClause(
946 const OMPExecutableDirective
&D
,
947 CodeGenFunction::OMPPrivateScope
&PrivateScope
) {
948 if (!HaveInsertPoint())
950 llvm::DenseSet
<const VarDecl
*> EmittedAsPrivate
;
951 for (const auto *C
: D
.getClausesOfKind
<OMPPrivateClause
>()) {
952 auto IRef
= C
->varlist_begin();
953 for (const Expr
*IInit
: C
->private_copies()) {
954 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
955 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
956 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IInit
)->getDecl());
958 // Emit private VarDecl with copy init.
960 PrivateScope
.addPrivate(OrigVD
, GetAddrOfLocalVar(VD
));
961 assert(IsRegistered
&& "private var already registered as private");
962 // Silence the warning about unused variable.
970 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective
&D
) {
971 if (!HaveInsertPoint())
973 // threadprivate_var1 = master_threadprivate_var1;
974 // operator=(threadprivate_var2, master_threadprivate_var2);
976 // __kmpc_barrier(&loc, global_tid);
977 llvm::DenseSet
<const VarDecl
*> CopiedVars
;
978 llvm::BasicBlock
*CopyBegin
= nullptr, *CopyEnd
= nullptr;
979 for (const auto *C
: D
.getClausesOfKind
<OMPCopyinClause
>()) {
980 auto IRef
= C
->varlist_begin();
981 auto ISrcRef
= C
->source_exprs().begin();
982 auto IDestRef
= C
->destination_exprs().begin();
983 for (const Expr
*AssignOp
: C
->assignment_ops()) {
984 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
985 QualType Type
= VD
->getType();
986 if (CopiedVars
.insert(VD
->getCanonicalDecl()).second
) {
987 // Get the address of the master variable. If we are emitting code with
988 // TLS support, the address is passed from the master as field in the
989 // captured declaration.
990 Address MasterAddr
= Address::invalid();
991 if (getLangOpts().OpenMPUseTLS
&&
992 getContext().getTargetInfo().isTLSSupported()) {
993 assert(CapturedStmtInfo
->lookup(VD
) &&
994 "Copyin threadprivates should have been captured!");
995 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(VD
), true,
996 (*IRef
)->getType(), VK_LValue
, (*IRef
)->getExprLoc());
997 MasterAddr
= EmitLValue(&DRE
).getAddress(*this);
998 LocalDeclMap
.erase(VD
);
1001 Address(VD
->isStaticLocal() ? CGM
.getStaticLocalDeclAddress(VD
)
1002 : CGM
.GetAddrOfGlobal(VD
),
1003 CGM
.getTypes().ConvertTypeForMem(VD
->getType()),
1004 getContext().getDeclAlign(VD
));
1006 // Get the address of the threadprivate variable.
1007 Address PrivateAddr
= EmitLValue(*IRef
).getAddress(*this);
1008 if (CopiedVars
.size() == 1) {
1009 // At first check if current thread is a master thread. If it is, no
1010 // need to copy data.
1011 CopyBegin
= createBasicBlock("copyin.not.master");
1012 CopyEnd
= createBasicBlock("copyin.not.master.end");
1013 // TODO: Avoid ptrtoint conversion.
1014 auto *MasterAddrInt
=
1015 Builder
.CreatePtrToInt(MasterAddr
.getPointer(), CGM
.IntPtrTy
);
1016 auto *PrivateAddrInt
=
1017 Builder
.CreatePtrToInt(PrivateAddr
.getPointer(), CGM
.IntPtrTy
);
1018 Builder
.CreateCondBr(
1019 Builder
.CreateICmpNE(MasterAddrInt
, PrivateAddrInt
), CopyBegin
,
1021 EmitBlock(CopyBegin
);
1024 cast
<VarDecl
>(cast
<DeclRefExpr
>(*ISrcRef
)->getDecl());
1025 const auto *DestVD
=
1026 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IDestRef
)->getDecl());
1027 EmitOMPCopy(Type
, PrivateAddr
, MasterAddr
, DestVD
, SrcVD
, AssignOp
);
1035 // Exit out of copying procedure for non-master thread.
1036 EmitBlock(CopyEnd
, /*IsFinished=*/true);
1042 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1043 const OMPExecutableDirective
&D
, OMPPrivateScope
&PrivateScope
) {
1044 if (!HaveInsertPoint())
1046 bool HasAtLeastOneLastprivate
= false;
1047 llvm::DenseSet
<const VarDecl
*> SIMDLCVs
;
1048 if (isOpenMPSimdDirective(D
.getDirectiveKind())) {
1049 const auto *LoopDirective
= cast
<OMPLoopDirective
>(&D
);
1050 for (const Expr
*C
: LoopDirective
->counters()) {
1052 cast
<VarDecl
>(cast
<DeclRefExpr
>(C
)->getDecl())->getCanonicalDecl());
1055 llvm::DenseSet
<const VarDecl
*> AlreadyEmittedVars
;
1056 for (const auto *C
: D
.getClausesOfKind
<OMPLastprivateClause
>()) {
1057 HasAtLeastOneLastprivate
= true;
1058 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind()) &&
1059 !getLangOpts().OpenMPSimd
)
1061 const auto *IRef
= C
->varlist_begin();
1062 const auto *IDestRef
= C
->destination_exprs().begin();
1063 for (const Expr
*IInit
: C
->private_copies()) {
1064 // Keep the address of the original variable for future update at the end
1066 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
1067 // Taskloops do not require additional initialization, it is done in
1068 // runtime support library.
1069 if (AlreadyEmittedVars
.insert(OrigVD
->getCanonicalDecl()).second
) {
1070 const auto *DestVD
=
1071 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IDestRef
)->getDecl());
1072 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
1073 /*RefersToEnclosingVariableOrCapture=*/
1074 CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
1075 (*IRef
)->getType(), VK_LValue
, (*IRef
)->getExprLoc());
1076 PrivateScope
.addPrivate(DestVD
, EmitLValue(&DRE
).getAddress(*this));
1077 // Check if the variable is also a firstprivate: in this case IInit is
1078 // not generated. Initialization of this variable will happen in codegen
1079 // for 'firstprivate' clause.
1080 if (IInit
&& !SIMDLCVs
.count(OrigVD
->getCanonicalDecl())) {
1081 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IInit
)->getDecl());
1082 Address VDAddr
= Address::invalid();
1083 if (C
->getKind() == OMPC_LASTPRIVATE_conditional
) {
1084 VDAddr
= CGM
.getOpenMPRuntime().emitLastprivateConditionalInit(
1086 setAddrOfLocalVar(VD
, VDAddr
);
1088 // Emit private VarDecl with copy init.
1090 VDAddr
= GetAddrOfLocalVar(VD
);
1092 bool IsRegistered
= PrivateScope
.addPrivate(OrigVD
, VDAddr
);
1093 assert(IsRegistered
&&
1094 "lastprivate var already registered as private");
1102 return HasAtLeastOneLastprivate
;
1105 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1106 const OMPExecutableDirective
&D
, bool NoFinals
,
1107 llvm::Value
*IsLastIterCond
) {
1108 if (!HaveInsertPoint())
1110 // Emit following code:
1111 // if (<IsLastIterCond>) {
1112 // orig_var1 = private_orig_var1;
1114 // orig_varn = private_orig_varn;
1116 llvm::BasicBlock
*ThenBB
= nullptr;
1117 llvm::BasicBlock
*DoneBB
= nullptr;
1118 if (IsLastIterCond
) {
1119 // Emit implicit barrier if at least one lastprivate conditional is found
1120 // and this is not a simd mode.
1121 if (!getLangOpts().OpenMPSimd
&&
1122 llvm::any_of(D
.getClausesOfKind
<OMPLastprivateClause
>(),
1123 [](const OMPLastprivateClause
*C
) {
1124 return C
->getKind() == OMPC_LASTPRIVATE_conditional
;
1126 CGM
.getOpenMPRuntime().emitBarrierCall(*this, D
.getBeginLoc(),
1128 /*EmitChecks=*/false,
1129 /*ForceSimpleCall=*/true);
1131 ThenBB
= createBasicBlock(".omp.lastprivate.then");
1132 DoneBB
= createBasicBlock(".omp.lastprivate.done");
1133 Builder
.CreateCondBr(IsLastIterCond
, ThenBB
, DoneBB
);
1136 llvm::DenseSet
<const VarDecl
*> AlreadyEmittedVars
;
1137 llvm::DenseMap
<const VarDecl
*, const Expr
*> LoopCountersAndUpdates
;
1138 if (const auto *LoopDirective
= dyn_cast
<OMPLoopDirective
>(&D
)) {
1139 auto IC
= LoopDirective
->counters().begin();
1140 for (const Expr
*F
: LoopDirective
->finals()) {
1142 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IC
)->getDecl())->getCanonicalDecl();
1144 AlreadyEmittedVars
.insert(D
);
1146 LoopCountersAndUpdates
[D
] = F
;
1150 for (const auto *C
: D
.getClausesOfKind
<OMPLastprivateClause
>()) {
1151 auto IRef
= C
->varlist_begin();
1152 auto ISrcRef
= C
->source_exprs().begin();
1153 auto IDestRef
= C
->destination_exprs().begin();
1154 for (const Expr
*AssignOp
: C
->assignment_ops()) {
1155 const auto *PrivateVD
=
1156 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
1157 QualType Type
= PrivateVD
->getType();
1158 const auto *CanonicalVD
= PrivateVD
->getCanonicalDecl();
1159 if (AlreadyEmittedVars
.insert(CanonicalVD
).second
) {
1160 // If lastprivate variable is a loop control variable for loop-based
1161 // directive, update its value before copyin back to original
1163 if (const Expr
*FinalExpr
= LoopCountersAndUpdates
.lookup(CanonicalVD
))
1164 EmitIgnoredExpr(FinalExpr
);
1166 cast
<VarDecl
>(cast
<DeclRefExpr
>(*ISrcRef
)->getDecl());
1167 const auto *DestVD
=
1168 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IDestRef
)->getDecl());
1169 // Get the address of the private variable.
1170 Address PrivateAddr
= GetAddrOfLocalVar(PrivateVD
);
1171 if (const auto *RefTy
= PrivateVD
->getType()->getAs
<ReferenceType
>())
1172 PrivateAddr
= Address(
1173 Builder
.CreateLoad(PrivateAddr
),
1174 CGM
.getTypes().ConvertTypeForMem(RefTy
->getPointeeType()),
1175 CGM
.getNaturalTypeAlignment(RefTy
->getPointeeType()));
1176 // Store the last value to the private copy in the last iteration.
1177 if (C
->getKind() == OMPC_LASTPRIVATE_conditional
)
1178 CGM
.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1179 *this, MakeAddrLValue(PrivateAddr
, (*IRef
)->getType()), PrivateVD
,
1180 (*IRef
)->getExprLoc());
1181 // Get the address of the original variable.
1182 Address OriginalAddr
= GetAddrOfLocalVar(DestVD
);
1183 EmitOMPCopy(Type
, OriginalAddr
, PrivateAddr
, DestVD
, SrcVD
, AssignOp
);
1189 if (const Expr
*PostUpdate
= C
->getPostUpdateExpr())
1190 EmitIgnoredExpr(PostUpdate
);
1193 EmitBlock(DoneBB
, /*IsFinished=*/true);
1196 void CodeGenFunction::EmitOMPReductionClauseInit(
1197 const OMPExecutableDirective
&D
,
1198 CodeGenFunction::OMPPrivateScope
&PrivateScope
, bool ForInscan
) {
1199 if (!HaveInsertPoint())
1201 SmallVector
<const Expr
*, 4> Shareds
;
1202 SmallVector
<const Expr
*, 4> Privates
;
1203 SmallVector
<const Expr
*, 4> ReductionOps
;
1204 SmallVector
<const Expr
*, 4> LHSs
;
1205 SmallVector
<const Expr
*, 4> RHSs
;
1207 SmallVector
<const Expr
*, 4> TaskLHSs
;
1208 SmallVector
<const Expr
*, 4> TaskRHSs
;
1209 for (const auto *C
: D
.getClausesOfKind
<OMPReductionClause
>()) {
1210 if (ForInscan
!= (C
->getModifier() == OMPC_REDUCTION_inscan
))
1212 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
1213 Privates
.append(C
->privates().begin(), C
->privates().end());
1214 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
1215 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
1216 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
1217 if (C
->getModifier() == OMPC_REDUCTION_task
) {
1218 Data
.ReductionVars
.append(C
->privates().begin(), C
->privates().end());
1219 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
1220 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
1221 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
1222 C
->reduction_ops().end());
1223 TaskLHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
1224 TaskRHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
1227 ReductionCodeGen
RedCG(Shareds
, Shareds
, Privates
, ReductionOps
);
1229 auto *ILHS
= LHSs
.begin();
1230 auto *IRHS
= RHSs
.begin();
1231 auto *IPriv
= Privates
.begin();
1232 for (const Expr
*IRef
: Shareds
) {
1233 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IPriv
)->getDecl());
1234 // Emit private VarDecl with reduction init.
1235 RedCG
.emitSharedOrigLValue(*this, Count
);
1236 RedCG
.emitAggregateType(*this, Count
);
1237 AutoVarEmission Emission
= EmitAutoVarAlloca(*PrivateVD
);
1238 RedCG
.emitInitialization(*this, Count
, Emission
.getAllocatedAddress(),
1239 RedCG
.getSharedLValue(Count
).getAddress(*this),
1240 [&Emission
](CodeGenFunction
&CGF
) {
1241 CGF
.EmitAutoVarInit(Emission
);
1244 EmitAutoVarCleanups(Emission
);
1245 Address BaseAddr
= RedCG
.adjustPrivateAddress(
1246 *this, Count
, Emission
.getAllocatedAddress());
1248 PrivateScope
.addPrivate(RedCG
.getBaseDecl(Count
), BaseAddr
);
1249 assert(IsRegistered
&& "private var already registered as private");
1250 // Silence the warning about unused variable.
1253 const auto *LHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
1254 const auto *RHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
1255 QualType Type
= PrivateVD
->getType();
1256 bool isaOMPArraySectionExpr
= isa
<OMPArraySectionExpr
>(IRef
);
1257 if (isaOMPArraySectionExpr
&& Type
->isVariablyModifiedType()) {
1258 // Store the address of the original variable associated with the LHS
1259 // implicit variable.
1260 PrivateScope
.addPrivate(LHSVD
,
1261 RedCG
.getSharedLValue(Count
).getAddress(*this));
1262 PrivateScope
.addPrivate(RHSVD
, GetAddrOfLocalVar(PrivateVD
));
1263 } else if ((isaOMPArraySectionExpr
&& Type
->isScalarType()) ||
1264 isa
<ArraySubscriptExpr
>(IRef
)) {
1265 // Store the address of the original variable associated with the LHS
1266 // implicit variable.
1267 PrivateScope
.addPrivate(LHSVD
,
1268 RedCG
.getSharedLValue(Count
).getAddress(*this));
1269 PrivateScope
.addPrivate(RHSVD
, Builder
.CreateElementBitCast(
1270 GetAddrOfLocalVar(PrivateVD
),
1271 ConvertTypeForMem(RHSVD
->getType()),
1274 QualType Type
= PrivateVD
->getType();
1275 bool IsArray
= getContext().getAsArrayType(Type
) != nullptr;
1276 Address OriginalAddr
= RedCG
.getSharedLValue(Count
).getAddress(*this);
1277 // Store the address of the original variable associated with the LHS
1278 // implicit variable.
1280 OriginalAddr
= Builder
.CreateElementBitCast(
1281 OriginalAddr
, ConvertTypeForMem(LHSVD
->getType()), "lhs.begin");
1283 PrivateScope
.addPrivate(LHSVD
, OriginalAddr
);
1284 PrivateScope
.addPrivate(
1285 RHSVD
, IsArray
? Builder
.CreateElementBitCast(
1286 GetAddrOfLocalVar(PrivateVD
),
1287 ConvertTypeForMem(RHSVD
->getType()), "rhs.begin")
1288 : GetAddrOfLocalVar(PrivateVD
));
1295 if (!Data
.ReductionVars
.empty()) {
1296 Data
.IsReductionWithTaskMod
= true;
1297 Data
.IsWorksharingReduction
=
1298 isOpenMPWorksharingDirective(D
.getDirectiveKind());
1299 llvm::Value
*ReductionDesc
= CGM
.getOpenMPRuntime().emitTaskReductionInit(
1300 *this, D
.getBeginLoc(), TaskLHSs
, TaskRHSs
, Data
);
1301 const Expr
*TaskRedRef
= nullptr;
1302 switch (D
.getDirectiveKind()) {
1304 TaskRedRef
= cast
<OMPParallelDirective
>(D
).getTaskReductionRefExpr();
1307 TaskRedRef
= cast
<OMPForDirective
>(D
).getTaskReductionRefExpr();
1310 TaskRedRef
= cast
<OMPSectionsDirective
>(D
).getTaskReductionRefExpr();
1312 case OMPD_parallel_for
:
1313 TaskRedRef
= cast
<OMPParallelForDirective
>(D
).getTaskReductionRefExpr();
1315 case OMPD_parallel_master
:
1317 cast
<OMPParallelMasterDirective
>(D
).getTaskReductionRefExpr();
1319 case OMPD_parallel_sections
:
1321 cast
<OMPParallelSectionsDirective
>(D
).getTaskReductionRefExpr();
1323 case OMPD_target_parallel
:
1325 cast
<OMPTargetParallelDirective
>(D
).getTaskReductionRefExpr();
1327 case OMPD_target_parallel_for
:
1329 cast
<OMPTargetParallelForDirective
>(D
).getTaskReductionRefExpr();
1331 case OMPD_distribute_parallel_for
:
1333 cast
<OMPDistributeParallelForDirective
>(D
).getTaskReductionRefExpr();
1335 case OMPD_teams_distribute_parallel_for
:
1336 TaskRedRef
= cast
<OMPTeamsDistributeParallelForDirective
>(D
)
1337 .getTaskReductionRefExpr();
1339 case OMPD_target_teams_distribute_parallel_for
:
1340 TaskRedRef
= cast
<OMPTargetTeamsDistributeParallelForDirective
>(D
)
1341 .getTaskReductionRefExpr();
1349 case OMPD_parallel_for_simd
:
1351 case OMPD_taskyield
:
1355 case OMPD_taskgroup
:
1363 case OMPD_cancellation_point
:
1365 case OMPD_target_data
:
1366 case OMPD_target_enter_data
:
1367 case OMPD_target_exit_data
:
1369 case OMPD_taskloop_simd
:
1370 case OMPD_master_taskloop
:
1371 case OMPD_master_taskloop_simd
:
1372 case OMPD_parallel_master_taskloop
:
1373 case OMPD_parallel_master_taskloop_simd
:
1374 case OMPD_distribute
:
1375 case OMPD_target_update
:
1376 case OMPD_distribute_parallel_for_simd
:
1377 case OMPD_distribute_simd
:
1378 case OMPD_target_parallel_for_simd
:
1379 case OMPD_target_simd
:
1380 case OMPD_teams_distribute
:
1381 case OMPD_teams_distribute_simd
:
1382 case OMPD_teams_distribute_parallel_for_simd
:
1383 case OMPD_target_teams
:
1384 case OMPD_target_teams_distribute
:
1385 case OMPD_target_teams_distribute_parallel_for_simd
:
1386 case OMPD_target_teams_distribute_simd
:
1387 case OMPD_declare_target
:
1388 case OMPD_end_declare_target
:
1389 case OMPD_threadprivate
:
1391 case OMPD_declare_reduction
:
1392 case OMPD_declare_mapper
:
1393 case OMPD_declare_simd
:
1395 case OMPD_declare_variant
:
1396 case OMPD_begin_declare_variant
:
1397 case OMPD_end_declare_variant
:
1400 llvm_unreachable("Enexpected directive with task reductions.");
1403 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(TaskRedRef
)->getDecl());
1405 EmitStoreOfScalar(ReductionDesc
, GetAddrOfLocalVar(VD
),
1406 /*Volatile=*/false, TaskRedRef
->getType());
1410 void CodeGenFunction::EmitOMPReductionClauseFinal(
1411 const OMPExecutableDirective
&D
, const OpenMPDirectiveKind ReductionKind
) {
1412 if (!HaveInsertPoint())
1414 llvm::SmallVector
<const Expr
*, 8> Privates
;
1415 llvm::SmallVector
<const Expr
*, 8> LHSExprs
;
1416 llvm::SmallVector
<const Expr
*, 8> RHSExprs
;
1417 llvm::SmallVector
<const Expr
*, 8> ReductionOps
;
1418 bool HasAtLeastOneReduction
= false;
1419 bool IsReductionWithTaskMod
= false;
1420 for (const auto *C
: D
.getClausesOfKind
<OMPReductionClause
>()) {
1421 // Do not emit for inscan reductions.
1422 if (C
->getModifier() == OMPC_REDUCTION_inscan
)
1424 HasAtLeastOneReduction
= true;
1425 Privates
.append(C
->privates().begin(), C
->privates().end());
1426 LHSExprs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
1427 RHSExprs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
1428 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
1429 IsReductionWithTaskMod
=
1430 IsReductionWithTaskMod
|| C
->getModifier() == OMPC_REDUCTION_task
;
1432 if (HasAtLeastOneReduction
) {
1433 if (IsReductionWithTaskMod
) {
1434 CGM
.getOpenMPRuntime().emitTaskReductionFini(
1435 *this, D
.getBeginLoc(),
1436 isOpenMPWorksharingDirective(D
.getDirectiveKind()));
1438 bool WithNowait
= D
.getSingleClause
<OMPNowaitClause
>() ||
1439 isOpenMPParallelDirective(D
.getDirectiveKind()) ||
1440 ReductionKind
== OMPD_simd
;
1441 bool SimpleReduction
= ReductionKind
== OMPD_simd
;
1442 // Emit nowait reduction if nowait clause is present or directive is a
1443 // parallel directive (it always has implicit barrier).
1444 CGM
.getOpenMPRuntime().emitReduction(
1445 *this, D
.getEndLoc(), Privates
, LHSExprs
, RHSExprs
, ReductionOps
,
1446 {WithNowait
, SimpleReduction
, ReductionKind
});
1450 static void emitPostUpdateForReductionClause(
1451 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
1452 const llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> CondGen
) {
1453 if (!CGF
.HaveInsertPoint())
1455 llvm::BasicBlock
*DoneBB
= nullptr;
1456 for (const auto *C
: D
.getClausesOfKind
<OMPReductionClause
>()) {
1457 if (const Expr
*PostUpdate
= C
->getPostUpdateExpr()) {
1459 if (llvm::Value
*Cond
= CondGen(CGF
)) {
1460 // If the first post-update expression is found, emit conditional
1461 // block if it was requested.
1462 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock(".omp.reduction.pu");
1463 DoneBB
= CGF
.createBasicBlock(".omp.reduction.pu.done");
1464 CGF
.Builder
.CreateCondBr(Cond
, ThenBB
, DoneBB
);
1465 CGF
.EmitBlock(ThenBB
);
1468 CGF
.EmitIgnoredExpr(PostUpdate
);
1472 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
1476 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1477 /// parallel function. This is necessary for combined constructs such as
1478 /// 'distribute parallel for'
1479 typedef llvm::function_ref
<void(CodeGenFunction
&,
1480 const OMPExecutableDirective
&,
1481 llvm::SmallVectorImpl
<llvm::Value
*> &)>
1482 CodeGenBoundParametersTy
;
1483 } // anonymous namespace
1486 checkForLastprivateConditionalUpdate(CodeGenFunction
&CGF
,
1487 const OMPExecutableDirective
&S
) {
1488 if (CGF
.getLangOpts().OpenMP
< 50)
1490 llvm::DenseSet
<CanonicalDeclPtr
<const VarDecl
>> PrivateDecls
;
1491 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
1492 for (const Expr
*Ref
: C
->varlists()) {
1493 if (!Ref
->getType()->isScalarType())
1495 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1498 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1499 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, Ref
);
1502 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
1503 for (const Expr
*Ref
: C
->varlists()) {
1504 if (!Ref
->getType()->isScalarType())
1506 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1509 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1510 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, Ref
);
1513 for (const auto *C
: S
.getClausesOfKind
<OMPLinearClause
>()) {
1514 for (const Expr
*Ref
: C
->varlists()) {
1515 if (!Ref
->getType()->isScalarType())
1517 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1520 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1521 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, Ref
);
1524 // Privates should ne analyzed since they are not captured at all.
1525 // Task reductions may be skipped - tasks are ignored.
1526 // Firstprivates do not return value but may be passed by reference - no need
1527 // to check for updated lastprivate conditional.
1528 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
1529 for (const Expr
*Ref
: C
->varlists()) {
1530 if (!Ref
->getType()->isScalarType())
1532 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1535 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1538 CGF
.CGM
.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1539 CGF
, S
, PrivateDecls
);
1542 static void emitCommonOMPParallelDirective(
1543 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
1544 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
1545 const CodeGenBoundParametersTy
&CodeGenBoundParameters
) {
1546 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_parallel
);
1547 llvm::Value
*NumThreads
= nullptr;
1548 llvm::Function
*OutlinedFn
=
1549 CGF
.CGM
.getOpenMPRuntime().emitParallelOutlinedFunction(
1550 CGF
, S
, *CS
->getCapturedDecl()->param_begin(), InnermostKind
,
1552 if (const auto *NumThreadsClause
= S
.getSingleClause
<OMPNumThreadsClause
>()) {
1553 CodeGenFunction::RunCleanupsScope
NumThreadsScope(CGF
);
1554 NumThreads
= CGF
.EmitScalarExpr(NumThreadsClause
->getNumThreads(),
1555 /*IgnoreResultAssign=*/true);
1556 CGF
.CGM
.getOpenMPRuntime().emitNumThreadsClause(
1557 CGF
, NumThreads
, NumThreadsClause
->getBeginLoc());
1559 if (const auto *ProcBindClause
= S
.getSingleClause
<OMPProcBindClause
>()) {
1560 CodeGenFunction::RunCleanupsScope
ProcBindScope(CGF
);
1561 CGF
.CGM
.getOpenMPRuntime().emitProcBindClause(
1562 CGF
, ProcBindClause
->getProcBindKind(), ProcBindClause
->getBeginLoc());
1564 const Expr
*IfCond
= nullptr;
1565 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
1566 if (C
->getNameModifier() == OMPD_unknown
||
1567 C
->getNameModifier() == OMPD_parallel
) {
1568 IfCond
= C
->getCondition();
1573 OMPParallelScope
Scope(CGF
, S
);
1574 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
1575 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1576 // lower and upper bounds with the pragma 'for' chunking mechanism.
1577 // The following lambda takes care of appending the lower and upper bound
1578 // parameters when necessary
1579 CodeGenBoundParameters(CGF
, S
, CapturedVars
);
1580 CGF
.GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
1581 CGF
.CGM
.getOpenMPRuntime().emitParallelCall(CGF
, S
.getBeginLoc(), OutlinedFn
,
1582 CapturedVars
, IfCond
, NumThreads
);
1585 static bool isAllocatableDecl(const VarDecl
*VD
) {
1586 const VarDecl
*CVD
= VD
->getCanonicalDecl();
1587 if (!CVD
->hasAttr
<OMPAllocateDeclAttr
>())
1589 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
1590 // Use the default allocation.
1591 return !((AA
->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc
||
1592 AA
->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc
) &&
1593 !AA
->getAllocator());
1596 static void emitEmptyBoundParameters(CodeGenFunction
&,
1597 const OMPExecutableDirective
&,
1598 llvm::SmallVectorImpl
<llvm::Value
*> &) {}
1600 static void emitOMPCopyinClause(CodeGenFunction
&CGF
,
1601 const OMPExecutableDirective
&S
) {
1602 bool Copyins
= CGF
.EmitOMPCopyinClause(S
);
1604 // Emit implicit barrier to synchronize threads and avoid data races on
1605 // propagation master's thread values of threadprivate variables to local
1606 // instances of that variables of all other implicit threads.
1607 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(
1608 CGF
, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
1609 /*ForceSimpleCall=*/true);
1613 Address
CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1614 CodeGenFunction
&CGF
, const VarDecl
*VD
) {
1615 CodeGenModule
&CGM
= CGF
.CGM
;
1616 auto &OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1619 return Address::invalid();
1620 const VarDecl
*CVD
= VD
->getCanonicalDecl();
1621 if (!isAllocatableDecl(CVD
))
1622 return Address::invalid();
1624 CharUnits Align
= CGM
.getContext().getDeclAlign(CVD
);
1625 if (CVD
->getType()->isVariablyModifiedType()) {
1626 Size
= CGF
.getTypeSize(CVD
->getType());
1627 // Align the size: ((size + align - 1) / align) * align
1628 Size
= CGF
.Builder
.CreateNUWAdd(
1629 Size
, CGM
.getSize(Align
- CharUnits::fromQuantity(1)));
1630 Size
= CGF
.Builder
.CreateUDiv(Size
, CGM
.getSize(Align
));
1631 Size
= CGF
.Builder
.CreateNUWMul(Size
, CGM
.getSize(Align
));
1633 CharUnits Sz
= CGM
.getContext().getTypeSizeInChars(CVD
->getType());
1634 Size
= CGM
.getSize(Sz
.alignTo(Align
));
1637 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
1638 assert(AA
->getAllocator() &&
1639 "Expected allocator expression for non-default allocator.");
1640 llvm::Value
*Allocator
= CGF
.EmitScalarExpr(AA
->getAllocator());
1641 // According to the standard, the original allocator type is a enum (integer).
1642 // Convert to pointer type, if required.
1643 if (Allocator
->getType()->isIntegerTy())
1644 Allocator
= CGF
.Builder
.CreateIntToPtr(Allocator
, CGM
.VoidPtrTy
);
1645 else if (Allocator
->getType()->isPointerTy())
1646 Allocator
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(Allocator
,
1649 llvm::Value
*Addr
= OMPBuilder
.createOMPAlloc(
1650 CGF
.Builder
, Size
, Allocator
,
1651 getNameWithSeparators({CVD
->getName(), ".void.addr"}, ".", "."));
1652 llvm::CallInst
*FreeCI
=
1653 OMPBuilder
.createOMPFree(CGF
.Builder
, Addr
, Allocator
);
1655 CGF
.EHStack
.pushCleanup
<OMPAllocateCleanupTy
>(NormalAndEHCleanup
, FreeCI
);
1656 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
1658 CGF
.ConvertTypeForMem(CGM
.getContext().getPointerType(CVD
->getType())),
1659 getNameWithSeparators({CVD
->getName(), ".addr"}, ".", "."));
1660 return Address(Addr
, CGF
.ConvertTypeForMem(CVD
->getType()), Align
);
1663 Address
CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1664 CodeGenFunction
&CGF
, const VarDecl
*VD
, Address VDAddr
,
1665 SourceLocation Loc
) {
1666 CodeGenModule
&CGM
= CGF
.CGM
;
1667 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1668 CGM
.getContext().getTargetInfo().isTLSSupported())
1671 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1673 llvm::Type
*VarTy
= VDAddr
.getElementType();
1675 CGF
.Builder
.CreatePointerCast(VDAddr
.getPointer(), CGM
.Int8PtrTy
);
1676 llvm::ConstantInt
*Size
= CGM
.getSize(CGM
.GetTargetTypeStoreSize(VarTy
));
1677 std::string Suffix
= getNameWithSeparators({"cache", ""});
1678 llvm::Twine CacheName
= Twine(CGM
.getMangledName(VD
)).concat(Suffix
);
1680 llvm::CallInst
*ThreadPrivateCacheCall
=
1681 OMPBuilder
.createCachedThreadPrivate(CGF
.Builder
, Data
, Size
, CacheName
);
1683 return Address(ThreadPrivateCacheCall
, CGM
.Int8Ty
, VDAddr
.getAlignment());
1686 std::string
CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1687 ArrayRef
<StringRef
> Parts
, StringRef FirstSeparator
, StringRef Separator
) {
1688 SmallString
<128> Buffer
;
1689 llvm::raw_svector_ostream
OS(Buffer
);
1690 StringRef Sep
= FirstSeparator
;
1691 for (StringRef Part
: Parts
) {
1695 return OS
.str().str();
1698 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1699 CodeGenFunction
&CGF
, const Stmt
*RegionBodyStmt
, InsertPointTy AllocaIP
,
1700 InsertPointTy CodeGenIP
, Twine RegionName
) {
1701 CGBuilderTy
&Builder
= CGF
.Builder
;
1702 Builder
.restoreIP(CodeGenIP
);
1703 llvm::BasicBlock
*FiniBB
= splitBBWithSuffix(Builder
, /*CreateBranch=*/false,
1704 "." + RegionName
+ ".after");
1707 OMPBuilderCBHelpers::InlinedRegionBodyRAII
IRB(CGF
, AllocaIP
, *FiniBB
);
1708 CGF
.EmitStmt(RegionBodyStmt
);
1711 if (Builder
.saveIP().isSet())
1712 Builder
.CreateBr(FiniBB
);
1715 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1716 CodeGenFunction
&CGF
, const Stmt
*RegionBodyStmt
, InsertPointTy AllocaIP
,
1717 InsertPointTy CodeGenIP
, Twine RegionName
) {
1718 CGBuilderTy
&Builder
= CGF
.Builder
;
1719 Builder
.restoreIP(CodeGenIP
);
1720 llvm::BasicBlock
*FiniBB
= splitBBWithSuffix(Builder
, /*CreateBranch=*/false,
1721 "." + RegionName
+ ".after");
1724 OMPBuilderCBHelpers::OutlinedRegionBodyRAII
IRB(CGF
, AllocaIP
, *FiniBB
);
1725 CGF
.EmitStmt(RegionBodyStmt
);
1728 if (Builder
.saveIP().isSet())
1729 Builder
.CreateBr(FiniBB
);
1732 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective
&S
) {
1733 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
1734 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1735 // Check if we have any if clause associated with the directive.
1736 llvm::Value
*IfCond
= nullptr;
1737 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
1738 IfCond
= EmitScalarExpr(C
->getCondition(),
1739 /*IgnoreResultAssign=*/true);
1741 llvm::Value
*NumThreads
= nullptr;
1742 if (const auto *NumThreadsClause
= S
.getSingleClause
<OMPNumThreadsClause
>())
1743 NumThreads
= EmitScalarExpr(NumThreadsClause
->getNumThreads(),
1744 /*IgnoreResultAssign=*/true);
1746 ProcBindKind ProcBind
= OMP_PROC_BIND_default
;
1747 if (const auto *ProcBindClause
= S
.getSingleClause
<OMPProcBindClause
>())
1748 ProcBind
= ProcBindClause
->getProcBindKind();
1750 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
1752 // The cleanup callback that finalizes all variabels at the given location,
1753 // thus calls destructors etc.
1754 auto FiniCB
= [this](InsertPointTy IP
) {
1755 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
1758 // Privatization callback that performs appropriate action for
1759 // shared/private/firstprivate/lastprivate/copyin/... variables.
1761 // TODO: This defaults to shared right now.
1762 auto PrivCB
= [](InsertPointTy AllocaIP
, InsertPointTy CodeGenIP
,
1763 llvm::Value
&, llvm::Value
&Val
, llvm::Value
*&ReplVal
) {
1764 // The next line is appropriate only for variables (Val) with the
1765 // data-sharing attribute "shared".
1771 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_parallel
);
1772 const Stmt
*ParallelRegionBodyStmt
= CS
->getCapturedStmt();
1774 auto BodyGenCB
= [&, this](InsertPointTy AllocaIP
,
1775 InsertPointTy CodeGenIP
) {
1776 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1777 *this, ParallelRegionBodyStmt
, AllocaIP
, CodeGenIP
, "parallel");
1780 CGCapturedStmtInfo
CGSI(*CS
, CR_OpenMP
);
1781 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(*this, &CGSI
);
1782 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
1783 AllocaInsertPt
->getParent(), AllocaInsertPt
->getIterator());
1785 OMPBuilder
.createParallel(Builder
, AllocaIP
, BodyGenCB
, PrivCB
, FiniCB
,
1786 IfCond
, NumThreads
, ProcBind
, S
.hasCancel()));
1790 // Emit parallel region as a standalone region.
1791 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
1793 OMPPrivateScope
PrivateScope(CGF
);
1794 emitOMPCopyinClause(CGF
, S
);
1795 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
1796 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
1797 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
1798 (void)PrivateScope
.Privatize();
1799 CGF
.EmitStmt(S
.getCapturedStmt(OMPD_parallel
)->getCapturedStmt());
1800 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
1804 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
1805 emitCommonOMPParallelDirective(*this, S
, OMPD_parallel
, CodeGen
,
1806 emitEmptyBoundParameters
);
1807 emitPostUpdateForReductionClause(*this, S
,
1808 [](CodeGenFunction
&) { return nullptr; });
1810 // Check for outer lastprivate conditional update.
1811 checkForLastprivateConditionalUpdate(*this, S
);
1814 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective
&S
) {
1815 EmitStmt(S
.getIfStmt());
1819 /// RAII to handle scopes for loop transformation directives.
1820 class OMPTransformDirectiveScopeRAII
{
1821 OMPLoopScope
*Scope
= nullptr;
1822 CodeGenFunction::CGCapturedStmtInfo
*CGSI
= nullptr;
1823 CodeGenFunction::CGCapturedStmtRAII
*CapInfoRAII
= nullptr;
1826 OMPTransformDirectiveScopeRAII(CodeGenFunction
&CGF
, const Stmt
*S
) {
1827 if (const auto *Dir
= dyn_cast
<OMPLoopBasedDirective
>(S
)) {
1828 Scope
= new OMPLoopScope(CGF
, *Dir
);
1829 CGSI
= new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP
);
1830 CapInfoRAII
= new CodeGenFunction::CGCapturedStmtRAII(CGF
, CGSI
);
1833 ~OMPTransformDirectiveScopeRAII() {
1843 static void emitBody(CodeGenFunction
&CGF
, const Stmt
*S
, const Stmt
*NextLoop
,
1844 int MaxLevel
, int Level
= 0) {
1845 assert(Level
< MaxLevel
&& "Too deep lookup during loop body codegen.");
1846 const Stmt
*SimplifiedS
= S
->IgnoreContainers();
1847 if (const auto *CS
= dyn_cast
<CompoundStmt
>(SimplifiedS
)) {
1848 PrettyStackTraceLoc
CrashInfo(
1849 CGF
.getContext().getSourceManager(), CS
->getLBracLoc(),
1850 "LLVM IR generation of compound statement ('{}')");
1852 // Keep track of the current cleanup stack depth, including debug scopes.
1853 CodeGenFunction::LexicalScope
Scope(CGF
, S
->getSourceRange());
1854 for (const Stmt
*CurStmt
: CS
->body())
1855 emitBody(CGF
, CurStmt
, NextLoop
, MaxLevel
, Level
);
1858 if (SimplifiedS
== NextLoop
) {
1859 if (auto *Dir
= dyn_cast
<OMPLoopTransformationDirective
>(SimplifiedS
))
1860 SimplifiedS
= Dir
->getTransformedStmt();
1861 if (const auto *CanonLoop
= dyn_cast
<OMPCanonicalLoop
>(SimplifiedS
))
1862 SimplifiedS
= CanonLoop
->getLoopStmt();
1863 if (const auto *For
= dyn_cast
<ForStmt
>(SimplifiedS
)) {
1866 assert(isa
<CXXForRangeStmt
>(SimplifiedS
) &&
1867 "Expected canonical for loop or range-based for loop.");
1868 const auto *CXXFor
= cast
<CXXForRangeStmt
>(SimplifiedS
);
1869 CGF
.EmitStmt(CXXFor
->getLoopVarStmt());
1870 S
= CXXFor
->getBody();
1872 if (Level
+ 1 < MaxLevel
) {
1873 NextLoop
= OMPLoopDirective::tryToFindNextInnerLoop(
1874 S
, /*TryImperfectlyNestedLoops=*/true);
1875 emitBody(CGF
, S
, NextLoop
, MaxLevel
, Level
+ 1);
1882 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective
&D
,
1883 JumpDest LoopExit
) {
1884 RunCleanupsScope
BodyScope(*this);
1885 // Update counters values on current iteration.
1886 for (const Expr
*UE
: D
.updates())
1887 EmitIgnoredExpr(UE
);
1888 // Update the linear variables.
1889 // In distribute directives only loop counters may be marked as linear, no
1890 // need to generate the code for them.
1891 if (!isOpenMPDistributeDirective(D
.getDirectiveKind())) {
1892 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
1893 for (const Expr
*UE
: C
->updates())
1894 EmitIgnoredExpr(UE
);
1898 // On a continue in the body, jump to the end.
1899 JumpDest Continue
= getJumpDestInCurrentScope("omp.body.continue");
1900 BreakContinueStack
.push_back(BreakContinue(LoopExit
, Continue
));
1901 for (const Expr
*E
: D
.finals_conditions()) {
1904 // Check that loop counter in non-rectangular nest fits into the iteration
1906 llvm::BasicBlock
*NextBB
= createBasicBlock("omp.body.next");
1907 EmitBranchOnBoolExpr(E
, NextBB
, Continue
.getBlock(),
1908 getProfileCount(D
.getBody()));
1912 OMPPrivateScope
InscanScope(*this);
1913 EmitOMPReductionClauseInit(D
, InscanScope
, /*ForInscan=*/true);
1914 bool IsInscanRegion
= InscanScope
.Privatize();
1915 if (IsInscanRegion
) {
1916 // Need to remember the block before and after scan directive
1917 // to dispatch them correctly depending on the clause used in
1918 // this directive, inclusive or exclusive. For inclusive scan the natural
1919 // order of the blocks is used, for exclusive clause the blocks must be
1920 // executed in reverse order.
1921 OMPBeforeScanBlock
= createBasicBlock("omp.before.scan.bb");
1922 OMPAfterScanBlock
= createBasicBlock("omp.after.scan.bb");
1923 // No need to allocate inscan exit block, in simd mode it is selected in the
1924 // codegen for the scan directive.
1925 if (D
.getDirectiveKind() != OMPD_simd
&& !getLangOpts().OpenMPSimd
)
1926 OMPScanExitBlock
= createBasicBlock("omp.exit.inscan.bb");
1927 OMPScanDispatch
= createBasicBlock("omp.inscan.dispatch");
1928 EmitBranch(OMPScanDispatch
);
1929 EmitBlock(OMPBeforeScanBlock
);
1932 // Emit loop variables for C++ range loops.
1934 D
.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1936 emitBody(*this, Body
,
1937 OMPLoopBasedDirective::tryToFindNextInnerLoop(
1938 Body
, /*TryImperfectlyNestedLoops=*/true),
1939 D
.getLoopsNumber());
1941 // Jump to the dispatcher at the end of the loop body.
1943 EmitBranch(OMPScanExitBlock
);
1945 // The end (updates/cleanups).
1946 EmitBlock(Continue
.getBlock());
1947 BreakContinueStack
.pop_back();
1950 using EmittedClosureTy
= std::pair
<llvm::Function
*, llvm::Value
*>;
1952 /// Emit a captured statement and return the function as well as its captured
1953 /// closure context.
1954 static EmittedClosureTy
emitCapturedStmtFunc(CodeGenFunction
&ParentCGF
,
1955 const CapturedStmt
*S
) {
1956 LValue CapStruct
= ParentCGF
.InitCapturedStruct(*S
);
1957 CodeGenFunction
CGF(ParentCGF
.CGM
, /*suppressNewContext=*/true);
1958 std::unique_ptr
<CodeGenFunction::CGCapturedStmtInfo
> CSI
=
1959 std::make_unique
<CodeGenFunction::CGCapturedStmtInfo
>(*S
);
1960 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, CSI
.get());
1961 llvm::Function
*F
= CGF
.GenerateCapturedStmtFunction(*S
);
1963 return {F
, CapStruct
.getPointer(ParentCGF
)};
1966 /// Emit a call to a previously captured closure.
1967 static llvm::CallInst
*
1968 emitCapturedStmtCall(CodeGenFunction
&ParentCGF
, EmittedClosureTy Cap
,
1969 llvm::ArrayRef
<llvm::Value
*> Args
) {
1970 // Append the closure context to the argument.
1971 SmallVector
<llvm::Value
*> EffectiveArgs
;
1972 EffectiveArgs
.reserve(Args
.size() + 1);
1973 llvm::append_range(EffectiveArgs
, Args
);
1974 EffectiveArgs
.push_back(Cap
.second
);
1976 return ParentCGF
.Builder
.CreateCall(Cap
.first
, EffectiveArgs
);
1979 llvm::CanonicalLoopInfo
*
1980 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt
*S
, int Depth
) {
1981 assert(Depth
== 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1983 // The caller is processing the loop-associated directive processing the \p
1984 // Depth loops nested in \p S. Put the previous pending loop-associated
1985 // directive to the stack. If the current loop-associated directive is a loop
1986 // transformation directive, it will push its generated loops onto the stack
1987 // such that together with the loops left here they form the combined loop
1988 // nest for the parent loop-associated directive.
1989 int ParentExpectedOMPLoopDepth
= ExpectedOMPLoopDepth
;
1990 ExpectedOMPLoopDepth
= Depth
;
1993 assert(OMPLoopNestStack
.size() >= (size_t)Depth
&& "Found too few loops");
1995 // The last added loop is the outermost one.
1996 llvm::CanonicalLoopInfo
*Result
= OMPLoopNestStack
.back();
1998 // Pop the \p Depth loops requested by the call from that stack and restore
1999 // the previous context.
2000 OMPLoopNestStack
.pop_back_n(Depth
);
2001 ExpectedOMPLoopDepth
= ParentExpectedOMPLoopDepth
;
2006 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop
*S
) {
2007 const Stmt
*SyntacticalLoop
= S
->getLoopStmt();
2008 if (!getLangOpts().OpenMPIRBuilder
) {
2009 // Ignore if OpenMPIRBuilder is not enabled.
2010 EmitStmt(SyntacticalLoop
);
2014 LexicalScope
ForScope(*this, S
->getSourceRange());
2016 // Emit init statements. The Distance/LoopVar funcs may reference variable
2017 // declarations they contain.
2018 const Stmt
*BodyStmt
;
2019 if (const auto *For
= dyn_cast
<ForStmt
>(SyntacticalLoop
)) {
2020 if (const Stmt
*InitStmt
= For
->getInit())
2022 BodyStmt
= For
->getBody();
2023 } else if (const auto *RangeFor
=
2024 dyn_cast
<CXXForRangeStmt
>(SyntacticalLoop
)) {
2025 if (const DeclStmt
*RangeStmt
= RangeFor
->getRangeStmt())
2026 EmitStmt(RangeStmt
);
2027 if (const DeclStmt
*BeginStmt
= RangeFor
->getBeginStmt())
2028 EmitStmt(BeginStmt
);
2029 if (const DeclStmt
*EndStmt
= RangeFor
->getEndStmt())
2031 if (const DeclStmt
*LoopVarStmt
= RangeFor
->getLoopVarStmt())
2032 EmitStmt(LoopVarStmt
);
2033 BodyStmt
= RangeFor
->getBody();
2035 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2037 // Emit closure for later use. By-value captures will be captured here.
2038 const CapturedStmt
*DistanceFunc
= S
->getDistanceFunc();
2039 EmittedClosureTy DistanceClosure
= emitCapturedStmtFunc(*this, DistanceFunc
);
2040 const CapturedStmt
*LoopVarFunc
= S
->getLoopVarFunc();
2041 EmittedClosureTy LoopVarClosure
= emitCapturedStmtFunc(*this, LoopVarFunc
);
2043 // Call the distance function to get the number of iterations of the loop to
2045 QualType LogicalTy
= DistanceFunc
->getCapturedDecl()
2048 .getNonReferenceType();
2049 Address CountAddr
= CreateMemTemp(LogicalTy
, ".count.addr");
2050 emitCapturedStmtCall(*this, DistanceClosure
, {CountAddr
.getPointer()});
2051 llvm::Value
*DistVal
= Builder
.CreateLoad(CountAddr
, ".count");
2053 // Emit the loop structure.
2054 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
2055 auto BodyGen
= [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP
,
2056 llvm::Value
*IndVar
) {
2057 Builder
.restoreIP(CodeGenIP
);
2059 // Emit the loop body: Convert the logical iteration number to the loop
2060 // variable and emit the body.
2061 const DeclRefExpr
*LoopVarRef
= S
->getLoopVarRef();
2062 LValue LCVal
= EmitLValue(LoopVarRef
);
2063 Address LoopVarAddress
= LCVal
.getAddress(*this);
2064 emitCapturedStmtCall(*this, LoopVarClosure
,
2065 {LoopVarAddress
.getPointer(), IndVar
});
2067 RunCleanupsScope
BodyScope(*this);
2070 llvm::CanonicalLoopInfo
*CL
=
2071 OMPBuilder
.createCanonicalLoop(Builder
, BodyGen
, DistVal
);
2073 // Finish up the loop.
2074 Builder
.restoreIP(CL
->getAfterIP());
2075 ForScope
.ForceCleanup();
2077 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2078 OMPLoopNestStack
.push_back(CL
);
2081 void CodeGenFunction::EmitOMPInnerLoop(
2082 const OMPExecutableDirective
&S
, bool RequiresCleanup
, const Expr
*LoopCond
,
2083 const Expr
*IncExpr
,
2084 const llvm::function_ref
<void(CodeGenFunction
&)> BodyGen
,
2085 const llvm::function_ref
<void(CodeGenFunction
&)> PostIncGen
) {
2086 auto LoopExit
= getJumpDestInCurrentScope("omp.inner.for.end");
2088 // Start the loop with a block that tests the condition.
2089 auto CondBlock
= createBasicBlock("omp.inner.for.cond");
2090 EmitBlock(CondBlock
);
2091 const SourceRange R
= S
.getSourceRange();
2093 // If attributes are attached, push to the basic block with them.
2094 const auto &OMPED
= cast
<OMPExecutableDirective
>(S
);
2095 const CapturedStmt
*ICS
= OMPED
.getInnermostCapturedStmt();
2096 const Stmt
*SS
= ICS
->getCapturedStmt();
2097 const AttributedStmt
*AS
= dyn_cast_or_null
<AttributedStmt
>(SS
);
2098 OMPLoopNestStack
.clear();
2100 LoopStack
.push(CondBlock
, CGM
.getContext(), CGM
.getCodeGenOpts(),
2101 AS
->getAttrs(), SourceLocToDebugLoc(R
.getBegin()),
2102 SourceLocToDebugLoc(R
.getEnd()));
2104 LoopStack
.push(CondBlock
, SourceLocToDebugLoc(R
.getBegin()),
2105 SourceLocToDebugLoc(R
.getEnd()));
2107 // If there are any cleanups between here and the loop-exit scope,
2108 // create a block to stage a loop exit along.
2109 llvm::BasicBlock
*ExitBlock
= LoopExit
.getBlock();
2110 if (RequiresCleanup
)
2111 ExitBlock
= createBasicBlock("omp.inner.for.cond.cleanup");
2113 llvm::BasicBlock
*LoopBody
= createBasicBlock("omp.inner.for.body");
2116 EmitBranchOnBoolExpr(LoopCond
, LoopBody
, ExitBlock
, getProfileCount(&S
));
2117 if (ExitBlock
!= LoopExit
.getBlock()) {
2118 EmitBlock(ExitBlock
);
2119 EmitBranchThroughCleanup(LoopExit
);
2122 EmitBlock(LoopBody
);
2123 incrementProfileCounter(&S
);
2125 // Create a block for the increment.
2126 JumpDest Continue
= getJumpDestInCurrentScope("omp.inner.for.inc");
2127 BreakContinueStack
.push_back(BreakContinue(LoopExit
, Continue
));
2131 // Emit "IV = IV + 1" and a back-edge to the condition block.
2132 EmitBlock(Continue
.getBlock());
2133 EmitIgnoredExpr(IncExpr
);
2135 BreakContinueStack
.pop_back();
2136 EmitBranch(CondBlock
);
2138 // Emit the fall-through block.
2139 EmitBlock(LoopExit
.getBlock());
2142 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective
&D
) {
2143 if (!HaveInsertPoint())
2145 // Emit inits for the linear variables.
2146 bool HasLinears
= false;
2147 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
2148 for (const Expr
*Init
: C
->inits()) {
2150 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(Init
)->getDecl());
2151 if (const auto *Ref
=
2152 dyn_cast
<DeclRefExpr
>(VD
->getInit()->IgnoreImpCasts())) {
2153 AutoVarEmission Emission
= EmitAutoVarAlloca(*VD
);
2154 const auto *OrigVD
= cast
<VarDecl
>(Ref
->getDecl());
2155 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
2156 CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
2157 VD
->getInit()->getType(), VK_LValue
,
2158 VD
->getInit()->getExprLoc());
2161 MakeAddrLValue(Emission
.getAllocatedAddress(), VD
->getType()),
2162 /*capturedByInit=*/false);
2163 EmitAutoVarCleanups(Emission
);
2168 // Emit the linear steps for the linear clauses.
2169 // If a step is not constant, it is pre-calculated before the loop.
2170 if (const auto *CS
= cast_or_null
<BinaryOperator
>(C
->getCalcStep()))
2171 if (const auto *SaveRef
= cast
<DeclRefExpr
>(CS
->getLHS())) {
2172 EmitVarDecl(*cast
<VarDecl
>(SaveRef
->getDecl()));
2173 // Emit calculation of the linear step.
2174 EmitIgnoredExpr(CS
);
2180 void CodeGenFunction::EmitOMPLinearClauseFinal(
2181 const OMPLoopDirective
&D
,
2182 const llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> CondGen
) {
2183 if (!HaveInsertPoint())
2185 llvm::BasicBlock
*DoneBB
= nullptr;
2186 // Emit the final values of the linear variables.
2187 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
2188 auto IC
= C
->varlist_begin();
2189 for (const Expr
*F
: C
->finals()) {
2191 if (llvm::Value
*Cond
= CondGen(*this)) {
2192 // If the first post-update expression is found, emit conditional
2193 // block if it was requested.
2194 llvm::BasicBlock
*ThenBB
= createBasicBlock(".omp.linear.pu");
2195 DoneBB
= createBasicBlock(".omp.linear.pu.done");
2196 Builder
.CreateCondBr(Cond
, ThenBB
, DoneBB
);
2200 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IC
)->getDecl());
2201 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
2202 CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
2203 (*IC
)->getType(), VK_LValue
, (*IC
)->getExprLoc());
2204 Address OrigAddr
= EmitLValue(&DRE
).getAddress(*this);
2205 CodeGenFunction::OMPPrivateScope
VarScope(*this);
2206 VarScope
.addPrivate(OrigVD
, OrigAddr
);
2207 (void)VarScope
.Privatize();
2211 if (const Expr
*PostUpdate
= C
->getPostUpdateExpr())
2212 EmitIgnoredExpr(PostUpdate
);
2215 EmitBlock(DoneBB
, /*IsFinished=*/true);
2218 static void emitAlignedClause(CodeGenFunction
&CGF
,
2219 const OMPExecutableDirective
&D
) {
2220 if (!CGF
.HaveInsertPoint())
2222 for (const auto *Clause
: D
.getClausesOfKind
<OMPAlignedClause
>()) {
2223 llvm::APInt
ClauseAlignment(64, 0);
2224 if (const Expr
*AlignmentExpr
= Clause
->getAlignment()) {
2226 cast
<llvm::ConstantInt
>(CGF
.EmitScalarExpr(AlignmentExpr
));
2227 ClauseAlignment
= AlignmentCI
->getValue();
2229 for (const Expr
*E
: Clause
->varlists()) {
2230 llvm::APInt
Alignment(ClauseAlignment
);
2231 if (Alignment
== 0) {
2232 // OpenMP [2.8.1, Description]
2233 // If no optional parameter is specified, implementation-defined default
2234 // alignments for SIMD instructions on the target platforms are assumed.
2237 .toCharUnitsFromBits(CGF
.getContext().getOpenMPDefaultSimdAlign(
2238 E
->getType()->getPointeeType()))
2241 assert((Alignment
== 0 || Alignment
.isPowerOf2()) &&
2242 "alignment is not power of 2");
2243 if (Alignment
!= 0) {
2244 llvm::Value
*PtrValue
= CGF
.EmitScalarExpr(E
);
2245 CGF
.emitAlignmentAssumption(
2246 PtrValue
, E
, /*No second loc needed*/ SourceLocation(),
2247 llvm::ConstantInt::get(CGF
.getLLVMContext(), Alignment
));
2253 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2254 const OMPLoopDirective
&S
, CodeGenFunction::OMPPrivateScope
&LoopScope
) {
2255 if (!HaveInsertPoint())
2257 auto I
= S
.private_counters().begin();
2258 for (const Expr
*E
: S
.counters()) {
2259 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
2260 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl());
2261 // Emit var without initialization.
2262 AutoVarEmission VarEmission
= EmitAutoVarAlloca(*PrivateVD
);
2263 EmitAutoVarCleanups(VarEmission
);
2264 LocalDeclMap
.erase(PrivateVD
);
2265 (void)LoopScope
.addPrivate(VD
, VarEmission
.getAllocatedAddress());
2266 if (LocalDeclMap
.count(VD
) || CapturedStmtInfo
->lookup(VD
) ||
2267 VD
->hasGlobalStorage()) {
2268 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(VD
),
2269 LocalDeclMap
.count(VD
) || CapturedStmtInfo
->lookup(VD
),
2270 E
->getType(), VK_LValue
, E
->getExprLoc());
2271 (void)LoopScope
.addPrivate(PrivateVD
, EmitLValue(&DRE
).getAddress(*this));
2273 (void)LoopScope
.addPrivate(PrivateVD
, VarEmission
.getAllocatedAddress());
2277 // Privatize extra loop counters used in loops for ordered(n) clauses.
2278 for (const auto *C
: S
.getClausesOfKind
<OMPOrderedClause
>()) {
2279 if (!C
->getNumForLoops())
2281 for (unsigned I
= S
.getLoopsNumber(), E
= C
->getLoopNumIterations().size();
2283 const auto *DRE
= cast
<DeclRefExpr
>(C
->getLoopCounter(I
));
2284 const auto *VD
= cast
<VarDecl
>(DRE
->getDecl());
2285 // Override only those variables that can be captured to avoid re-emission
2286 // of the variables declared within the loops.
2287 if (DRE
->refersToEnclosingVariableOrCapture()) {
2288 (void)LoopScope
.addPrivate(
2289 VD
, CreateMemTemp(DRE
->getType(), VD
->getName()));
2295 static void emitPreCond(CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2296 const Expr
*Cond
, llvm::BasicBlock
*TrueBlock
,
2297 llvm::BasicBlock
*FalseBlock
, uint64_t TrueCount
) {
2298 if (!CGF
.HaveInsertPoint())
2301 CodeGenFunction::OMPPrivateScope
PreCondScope(CGF
);
2302 CGF
.EmitOMPPrivateLoopCounters(S
, PreCondScope
);
2303 (void)PreCondScope
.Privatize();
2304 // Get initial values of real counters.
2305 for (const Expr
*I
: S
.inits()) {
2306 CGF
.EmitIgnoredExpr(I
);
2309 // Create temp loop control variables with their init values to support
2310 // non-rectangular loops.
2311 CodeGenFunction::OMPMapVars PreCondVars
;
2312 for (const Expr
*E
: S
.dependent_counters()) {
2315 assert(!E
->getType().getNonReferenceType()->isRecordType() &&
2316 "dependent counter must not be an iterator.");
2317 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
2318 Address CounterAddr
=
2319 CGF
.CreateMemTemp(VD
->getType().getNonReferenceType());
2320 (void)PreCondVars
.setVarAddr(CGF
, VD
, CounterAddr
);
2322 (void)PreCondVars
.apply(CGF
);
2323 for (const Expr
*E
: S
.dependent_inits()) {
2326 CGF
.EmitIgnoredExpr(E
);
2328 // Check that loop is executed at least one time.
2329 CGF
.EmitBranchOnBoolExpr(Cond
, TrueBlock
, FalseBlock
, TrueCount
);
2330 PreCondVars
.restore(CGF
);
2333 void CodeGenFunction::EmitOMPLinearClause(
2334 const OMPLoopDirective
&D
, CodeGenFunction::OMPPrivateScope
&PrivateScope
) {
2335 if (!HaveInsertPoint())
2337 llvm::DenseSet
<const VarDecl
*> SIMDLCVs
;
2338 if (isOpenMPSimdDirective(D
.getDirectiveKind())) {
2339 const auto *LoopDirective
= cast
<OMPLoopDirective
>(&D
);
2340 for (const Expr
*C
: LoopDirective
->counters()) {
2342 cast
<VarDecl
>(cast
<DeclRefExpr
>(C
)->getDecl())->getCanonicalDecl());
2345 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
2346 auto CurPrivate
= C
->privates().begin();
2347 for (const Expr
*E
: C
->varlists()) {
2348 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
2349 const auto *PrivateVD
=
2350 cast
<VarDecl
>(cast
<DeclRefExpr
>(*CurPrivate
)->getDecl());
2351 if (!SIMDLCVs
.count(VD
->getCanonicalDecl())) {
2352 // Emit private VarDecl with copy init.
2353 EmitVarDecl(*PrivateVD
);
2355 PrivateScope
.addPrivate(VD
, GetAddrOfLocalVar(PrivateVD
));
2356 assert(IsRegistered
&& "linear var already registered as private");
2357 // Silence the warning about unused variable.
2360 EmitVarDecl(*PrivateVD
);
2367 static void emitSimdlenSafelenClause(CodeGenFunction
&CGF
,
2368 const OMPExecutableDirective
&D
) {
2369 if (!CGF
.HaveInsertPoint())
2371 if (const auto *C
= D
.getSingleClause
<OMPSimdlenClause
>()) {
2372 RValue Len
= CGF
.EmitAnyExpr(C
->getSimdlen(), AggValueSlot::ignored(),
2373 /*ignoreResult=*/true);
2374 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2375 CGF
.LoopStack
.setVectorizeWidth(Val
->getZExtValue());
2376 // In presence of finite 'safelen', it may be unsafe to mark all
2377 // the memory instructions parallel, because loop-carried
2378 // dependences of 'safelen' iterations are possible.
2379 CGF
.LoopStack
.setParallel(!D
.getSingleClause
<OMPSafelenClause
>());
2380 } else if (const auto *C
= D
.getSingleClause
<OMPSafelenClause
>()) {
2381 RValue Len
= CGF
.EmitAnyExpr(C
->getSafelen(), AggValueSlot::ignored(),
2382 /*ignoreResult=*/true);
2383 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2384 CGF
.LoopStack
.setVectorizeWidth(Val
->getZExtValue());
2385 // In presence of finite 'safelen', it may be unsafe to mark all
2386 // the memory instructions parallel, because loop-carried
2387 // dependences of 'safelen' iterations are possible.
2388 CGF
.LoopStack
.setParallel(/*Enable=*/false);
2392 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
&D
) {
2393 // Walk clauses and process safelen/lastprivate.
2394 LoopStack
.setParallel(/*Enable=*/true);
2395 LoopStack
.setVectorizeEnable();
2396 emitSimdlenSafelenClause(*this, D
);
2397 if (const auto *C
= D
.getSingleClause
<OMPOrderClause
>())
2398 if (C
->getKind() == OMPC_ORDER_concurrent
)
2399 LoopStack
.setParallel(/*Enable=*/true);
2400 if ((D
.getDirectiveKind() == OMPD_simd
||
2401 (getLangOpts().OpenMPSimd
&&
2402 isOpenMPSimdDirective(D
.getDirectiveKind()))) &&
2403 llvm::any_of(D
.getClausesOfKind
<OMPReductionClause
>(),
2404 [](const OMPReductionClause
*C
) {
2405 return C
->getModifier() == OMPC_REDUCTION_inscan
;
2407 // Disable parallel access in case of prefix sum.
2408 LoopStack
.setParallel(/*Enable=*/false);
2411 void CodeGenFunction::EmitOMPSimdFinal(
2412 const OMPLoopDirective
&D
,
2413 const llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> CondGen
) {
2414 if (!HaveInsertPoint())
2416 llvm::BasicBlock
*DoneBB
= nullptr;
2417 auto IC
= D
.counters().begin();
2418 auto IPC
= D
.private_counters().begin();
2419 for (const Expr
*F
: D
.finals()) {
2420 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>((*IC
))->getDecl());
2421 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>((*IPC
))->getDecl());
2422 const auto *CED
= dyn_cast
<OMPCapturedExprDecl
>(OrigVD
);
2423 if (LocalDeclMap
.count(OrigVD
) || CapturedStmtInfo
->lookup(OrigVD
) ||
2424 OrigVD
->hasGlobalStorage() || CED
) {
2426 if (llvm::Value
*Cond
= CondGen(*this)) {
2427 // If the first post-update expression is found, emit conditional
2428 // block if it was requested.
2429 llvm::BasicBlock
*ThenBB
= createBasicBlock(".omp.final.then");
2430 DoneBB
= createBasicBlock(".omp.final.done");
2431 Builder
.CreateCondBr(Cond
, ThenBB
, DoneBB
);
2435 Address OrigAddr
= Address::invalid();
2438 EmitLValue(CED
->getInit()->IgnoreImpCasts()).getAddress(*this);
2440 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(PrivateVD
),
2441 /*RefersToEnclosingVariableOrCapture=*/false,
2442 (*IPC
)->getType(), VK_LValue
, (*IPC
)->getExprLoc());
2443 OrigAddr
= EmitLValue(&DRE
).getAddress(*this);
2445 OMPPrivateScope
VarScope(*this);
2446 VarScope
.addPrivate(OrigVD
, OrigAddr
);
2447 (void)VarScope
.Privatize();
2454 EmitBlock(DoneBB
, /*IsFinished=*/true);
2457 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction
&CGF
,
2458 const OMPLoopDirective
&S
,
2459 CodeGenFunction::JumpDest LoopExit
) {
2460 CGF
.EmitOMPLoopBody(S
, LoopExit
);
2461 CGF
.EmitStopPoint(&S
);
2464 /// Emit a helper variable and return corresponding lvalue.
2465 static LValue
EmitOMPHelperVar(CodeGenFunction
&CGF
,
2466 const DeclRefExpr
*Helper
) {
2467 auto VDecl
= cast
<VarDecl
>(Helper
->getDecl());
2468 CGF
.EmitVarDecl(*VDecl
);
2469 return CGF
.EmitLValue(Helper
);
2472 static void emitCommonSimdLoop(CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2473 const RegionCodeGenTy
&SimdInitGen
,
2474 const RegionCodeGenTy
&BodyCodeGen
) {
2475 auto &&ThenGen
= [&S
, &SimdInitGen
, &BodyCodeGen
](CodeGenFunction
&CGF
,
2476 PrePostActionTy
&) {
2477 CGOpenMPRuntime::NontemporalDeclsRAII
NontemporalsRegion(CGF
.CGM
, S
);
2478 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
2483 auto &&ElseGen
= [&BodyCodeGen
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2484 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
2485 CGF
.LoopStack
.setVectorizeEnable(/*Enable=*/false);
2489 const Expr
*IfCond
= nullptr;
2490 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
2491 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
2492 if (CGF
.getLangOpts().OpenMP
>= 50 &&
2493 (C
->getNameModifier() == OMPD_unknown
||
2494 C
->getNameModifier() == OMPD_simd
)) {
2495 IfCond
= C
->getCondition();
2501 CGF
.CGM
.getOpenMPRuntime().emitIfClause(CGF
, IfCond
, ThenGen
, ElseGen
);
2503 RegionCodeGenTy
ThenRCG(ThenGen
);
2508 static void emitOMPSimdRegion(CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2509 PrePostActionTy
&Action
) {
2511 assert(isOpenMPSimdDirective(S
.getDirectiveKind()) &&
2512 "Expected simd directive");
2513 OMPLoopScope
PreInitScope(CGF
, S
);
2515 // for (IV in 0..LastIteration) BODY;
2516 // <Final counter/linear vars updates>;
2519 if (isOpenMPDistributeDirective(S
.getDirectiveKind()) ||
2520 isOpenMPWorksharingDirective(S
.getDirectiveKind()) ||
2521 isOpenMPTaskLoopDirective(S
.getDirectiveKind())) {
2522 (void)EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(S
.getLowerBoundVariable()));
2523 (void)EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(S
.getUpperBoundVariable()));
2526 // Emit: if (PreCond) - begin.
2527 // If the condition constant folds and can be elided, avoid emitting the
2530 llvm::BasicBlock
*ContBlock
= nullptr;
2531 if (CGF
.ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
2535 llvm::BasicBlock
*ThenBlock
= CGF
.createBasicBlock("simd.if.then");
2536 ContBlock
= CGF
.createBasicBlock("simd.if.end");
2537 emitPreCond(CGF
, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
2538 CGF
.getProfileCount(&S
));
2539 CGF
.EmitBlock(ThenBlock
);
2540 CGF
.incrementProfileCounter(&S
);
2543 // Emit the loop iteration variable.
2544 const Expr
*IVExpr
= S
.getIterationVariable();
2545 const auto *IVDecl
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IVExpr
)->getDecl());
2546 CGF
.EmitVarDecl(*IVDecl
);
2547 CGF
.EmitIgnoredExpr(S
.getInit());
2549 // Emit the iterations count variable.
2550 // If it is not a variable, Sema decided to calculate iterations count on
2551 // each iteration (e.g., it is foldable into a constant).
2552 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
2553 CGF
.EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
2554 // Emit calculation of the iterations count.
2555 CGF
.EmitIgnoredExpr(S
.getCalcLastIteration());
2558 emitAlignedClause(CGF
, S
);
2559 (void)CGF
.EmitOMPLinearClauseInit(S
);
2561 CodeGenFunction::OMPPrivateScope
LoopScope(CGF
);
2562 CGF
.EmitOMPPrivateLoopCounters(S
, LoopScope
);
2563 CGF
.EmitOMPLinearClause(S
, LoopScope
);
2564 CGF
.EmitOMPPrivateClause(S
, LoopScope
);
2565 CGF
.EmitOMPReductionClauseInit(S
, LoopScope
);
2566 CGOpenMPRuntime::LastprivateConditionalRAII
LPCRegion(
2567 CGF
, S
, CGF
.EmitLValue(S
.getIterationVariable()));
2568 bool HasLastprivateClause
= CGF
.EmitOMPLastprivateClauseInit(S
, LoopScope
);
2569 (void)LoopScope
.Privatize();
2570 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
2571 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
2575 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2576 CGF
.EmitOMPSimdInit(S
);
2578 [&S
, &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2579 CGF
.EmitOMPInnerLoop(
2580 S
, LoopScope
.requiresCleanups(), S
.getCond(), S
.getInc(),
2581 [&S
](CodeGenFunction
&CGF
) {
2582 emitOMPLoopBodyWithStopPoint(CGF
, S
,
2583 CodeGenFunction::JumpDest());
2585 [](CodeGenFunction
&) {});
2587 CGF
.EmitOMPSimdFinal(S
, [](CodeGenFunction
&) { return nullptr; });
2588 // Emit final copy of the lastprivate variables at the end of loops.
2589 if (HasLastprivateClause
)
2590 CGF
.EmitOMPLastprivateClauseFinal(S
, /*NoFinals=*/true);
2591 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_simd
);
2592 emitPostUpdateForReductionClause(CGF
, S
,
2593 [](CodeGenFunction
&) { return nullptr; });
2594 LoopScope
.restoreMap();
2595 CGF
.EmitOMPLinearClauseFinal(S
, [](CodeGenFunction
&) { return nullptr; });
2597 // Emit: if (PreCond) - end.
2599 CGF
.EmitBranch(ContBlock
);
2600 CGF
.EmitBlock(ContBlock
, true);
2604 static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective
&S
) {
2605 // Check for unsupported clauses
2606 for (OMPClause
*C
: S
.clauses()) {
2607 // Currently only order, simdlen and safelen clauses are supported
2608 if (!(isa
<OMPSimdlenClause
>(C
) || isa
<OMPSafelenClause
>(C
) ||
2609 isa
<OMPOrderClause
>(C
) || isa
<OMPAlignedClause
>(C
)))
2613 // Check if we have a statement with the ordered directive.
2614 // Visit the statement hierarchy to find a compound statement
2615 // with a ordered directive in it.
2616 if (const auto *CanonLoop
= dyn_cast
<OMPCanonicalLoop
>(S
.getRawStmt())) {
2617 if (const Stmt
*SyntacticalLoop
= CanonLoop
->getLoopStmt()) {
2618 for (const Stmt
*SubStmt
: SyntacticalLoop
->children()) {
2621 if (const CompoundStmt
*CS
= dyn_cast
<CompoundStmt
>(SubStmt
)) {
2622 for (const Stmt
*CSSubStmt
: CS
->children()) {
2625 if (isa
<OMPOrderedDirective
>(CSSubStmt
)) {
2635 static llvm::MapVector
<llvm::Value
*, llvm::Value
*>
2636 GetAlignedMapping(const OMPSimdDirective
&S
, CodeGenFunction
&CGF
) {
2637 llvm::MapVector
<llvm::Value
*, llvm::Value
*> AlignedVars
;
2638 for (const auto *Clause
: S
.getClausesOfKind
<OMPAlignedClause
>()) {
2639 llvm::APInt
ClauseAlignment(64, 0);
2640 if (const Expr
*AlignmentExpr
= Clause
->getAlignment()) {
2642 cast
<llvm::ConstantInt
>(CGF
.EmitScalarExpr(AlignmentExpr
));
2643 ClauseAlignment
= AlignmentCI
->getValue();
2645 for (const Expr
*E
: Clause
->varlists()) {
2646 llvm::APInt
Alignment(ClauseAlignment
);
2647 if (Alignment
== 0) {
2648 // OpenMP [2.8.1, Description]
2649 // If no optional parameter is specified, implementation-defined default
2650 // alignments for SIMD instructions on the target platforms are assumed.
2653 .toCharUnitsFromBits(CGF
.getContext().getOpenMPDefaultSimdAlign(
2654 E
->getType()->getPointeeType()))
2657 assert((Alignment
== 0 || Alignment
.isPowerOf2()) &&
2658 "alignment is not power of 2");
2659 llvm::Value
*PtrValue
= CGF
.EmitScalarExpr(E
);
2660 AlignedVars
[PtrValue
] = CGF
.Builder
.getInt64(Alignment
.getSExtValue());
2666 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective
&S
) {
2667 bool UseOMPIRBuilder
=
2668 CGM
.getLangOpts().OpenMPIRBuilder
&& isSupportedByOpenMPIRBuilder(S
);
2669 if (UseOMPIRBuilder
) {
2670 auto &&CodeGenIRBuilder
= [this, &S
, UseOMPIRBuilder
](CodeGenFunction
&CGF
,
2671 PrePostActionTy
&) {
2672 // Use the OpenMPIRBuilder if enabled.
2673 if (UseOMPIRBuilder
) {
2674 llvm::MapVector
<llvm::Value
*, llvm::Value
*> AlignedVars
=
2675 GetAlignedMapping(S
, CGF
);
2676 // Emit the associated statement and get its loop representation.
2677 const Stmt
*Inner
= S
.getRawStmt();
2678 llvm::CanonicalLoopInfo
*CLI
=
2679 EmitOMPCollapsedCanonicalLoopNest(Inner
, 1);
2681 llvm::OpenMPIRBuilder
&OMPBuilder
=
2682 CGM
.getOpenMPRuntime().getOMPBuilder();
2683 // Add SIMD specific metadata
2684 llvm::ConstantInt
*Simdlen
= nullptr;
2685 if (const auto *C
= S
.getSingleClause
<OMPSimdlenClause
>()) {
2687 this->EmitAnyExpr(C
->getSimdlen(), AggValueSlot::ignored(),
2688 /*ignoreResult=*/true);
2689 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2692 llvm::ConstantInt
*Safelen
= nullptr;
2693 if (const auto *C
= S
.getSingleClause
<OMPSafelenClause
>()) {
2695 this->EmitAnyExpr(C
->getSafelen(), AggValueSlot::ignored(),
2696 /*ignoreResult=*/true);
2697 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2700 llvm::omp::OrderKind Order
= llvm::omp::OrderKind::OMP_ORDER_unknown
;
2701 if (const auto *C
= S
.getSingleClause
<OMPOrderClause
>()) {
2702 if (C
->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent
) {
2703 Order
= llvm::omp::OrderKind::OMP_ORDER_concurrent
;
2706 // Add simd metadata to the collapsed loop. Do not generate
2707 // another loop for if clause. Support for if clause is done earlier.
2708 OMPBuilder
.applySimd(CLI
, AlignedVars
,
2709 /*IfCond*/ nullptr, Order
, Simdlen
, Safelen
);
2715 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
2716 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
2717 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
,
2723 ParentLoopDirectiveForScanRegion
ScanRegion(*this, S
);
2724 OMPFirstScanLoop
= true;
2725 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
2726 emitOMPSimdRegion(CGF
, S
, Action
);
2730 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
2731 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
2732 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
, CodeGen
);
2734 // Check for outer lastprivate conditional update.
2735 checkForLastprivateConditionalUpdate(*this, S
);
2738 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective
&S
) {
2739 // Emit the de-sugared statement.
2740 OMPTransformDirectiveScopeRAII
TileScope(*this, &S
);
2741 EmitStmt(S
.getTransformedStmt());
2744 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective
&S
) {
2745 bool UseOMPIRBuilder
= CGM
.getLangOpts().OpenMPIRBuilder
;
2747 if (UseOMPIRBuilder
) {
2748 auto DL
= SourceLocToDebugLoc(S
.getBeginLoc());
2749 const Stmt
*Inner
= S
.getRawStmt();
2751 // Consume nested loop. Clear the entire remaining loop stack because a
2752 // fully unrolled loop is non-transformable. For partial unrolling the
2753 // generated outer loop is pushed back to the stack.
2754 llvm::CanonicalLoopInfo
*CLI
= EmitOMPCollapsedCanonicalLoopNest(Inner
, 1);
2755 OMPLoopNestStack
.clear();
2757 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
2759 bool NeedsUnrolledCLI
= ExpectedOMPLoopDepth
>= 1;
2760 llvm::CanonicalLoopInfo
*UnrolledCLI
= nullptr;
2762 if (S
.hasClausesOfKind
<OMPFullClause
>()) {
2763 assert(ExpectedOMPLoopDepth
== 0);
2764 OMPBuilder
.unrollLoopFull(DL
, CLI
);
2765 } else if (auto *PartialClause
= S
.getSingleClause
<OMPPartialClause
>()) {
2766 uint64_t Factor
= 0;
2767 if (Expr
*FactorExpr
= PartialClause
->getFactor()) {
2768 Factor
= FactorExpr
->EvaluateKnownConstInt(getContext()).getZExtValue();
2769 assert(Factor
>= 1 && "Only positive factors are valid");
2771 OMPBuilder
.unrollLoopPartial(DL
, CLI
, Factor
,
2772 NeedsUnrolledCLI
? &UnrolledCLI
: nullptr);
2774 OMPBuilder
.unrollLoopHeuristic(DL
, CLI
);
2777 assert((!NeedsUnrolledCLI
|| UnrolledCLI
) &&
2778 "NeedsUnrolledCLI implies UnrolledCLI to be set");
2780 OMPLoopNestStack
.push_back(UnrolledCLI
);
2785 // This function is only called if the unrolled loop is not consumed by any
2786 // other loop-associated construct. Such a loop-associated construct will have
2787 // used the transformed AST.
2789 // Set the unroll metadata for the next emitted loop.
2790 LoopStack
.setUnrollState(LoopAttributes::Enable
);
2792 if (S
.hasClausesOfKind
<OMPFullClause
>()) {
2793 LoopStack
.setUnrollState(LoopAttributes::Full
);
2794 } else if (auto *PartialClause
= S
.getSingleClause
<OMPPartialClause
>()) {
2795 if (Expr
*FactorExpr
= PartialClause
->getFactor()) {
2797 FactorExpr
->EvaluateKnownConstInt(getContext()).getZExtValue();
2798 assert(Factor
>= 1 && "Only positive factors are valid");
2799 LoopStack
.setUnrollCount(Factor
);
2803 EmitStmt(S
.getAssociatedStmt());
2806 void CodeGenFunction::EmitOMPOuterLoop(
2807 bool DynamicOrOrdered
, bool IsMonotonic
, const OMPLoopDirective
&S
,
2808 CodeGenFunction::OMPPrivateScope
&LoopScope
,
2809 const CodeGenFunction::OMPLoopArguments
&LoopArgs
,
2810 const CodeGenFunction::CodeGenLoopTy
&CodeGenLoop
,
2811 const CodeGenFunction::CodeGenOrderedTy
&CodeGenOrdered
) {
2812 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
2814 const Expr
*IVExpr
= S
.getIterationVariable();
2815 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
2816 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
2818 JumpDest LoopExit
= getJumpDestInCurrentScope("omp.dispatch.end");
2820 // Start the loop with a block that tests the condition.
2821 llvm::BasicBlock
*CondBlock
= createBasicBlock("omp.dispatch.cond");
2822 EmitBlock(CondBlock
);
2823 const SourceRange R
= S
.getSourceRange();
2824 OMPLoopNestStack
.clear();
2825 LoopStack
.push(CondBlock
, SourceLocToDebugLoc(R
.getBegin()),
2826 SourceLocToDebugLoc(R
.getEnd()));
2828 llvm::Value
*BoolCondVal
= nullptr;
2829 if (!DynamicOrOrdered
) {
2830 // UB = min(UB, GlobalUB) or
2831 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2832 // 'distribute parallel for')
2833 EmitIgnoredExpr(LoopArgs
.EUB
);
2835 EmitIgnoredExpr(LoopArgs
.Init
);
2837 BoolCondVal
= EvaluateExprAsBool(LoopArgs
.Cond
);
2840 RT
.emitForNext(*this, S
.getBeginLoc(), IVSize
, IVSigned
, LoopArgs
.IL
,
2841 LoopArgs
.LB
, LoopArgs
.UB
, LoopArgs
.ST
);
2844 // If there are any cleanups between here and the loop-exit scope,
2845 // create a block to stage a loop exit along.
2846 llvm::BasicBlock
*ExitBlock
= LoopExit
.getBlock();
2847 if (LoopScope
.requiresCleanups())
2848 ExitBlock
= createBasicBlock("omp.dispatch.cleanup");
2850 llvm::BasicBlock
*LoopBody
= createBasicBlock("omp.dispatch.body");
2851 Builder
.CreateCondBr(BoolCondVal
, LoopBody
, ExitBlock
);
2852 if (ExitBlock
!= LoopExit
.getBlock()) {
2853 EmitBlock(ExitBlock
);
2854 EmitBranchThroughCleanup(LoopExit
);
2856 EmitBlock(LoopBody
);
2858 // Emit "IV = LB" (in case of static schedule, we have already calculated new
2859 // LB for loop condition and emitted it above).
2860 if (DynamicOrOrdered
)
2861 EmitIgnoredExpr(LoopArgs
.Init
);
2863 // Create a block for the increment.
2864 JumpDest Continue
= getJumpDestInCurrentScope("omp.dispatch.inc");
2865 BreakContinueStack
.push_back(BreakContinue(LoopExit
, Continue
));
2869 [&S
, IsMonotonic
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2870 // Generate !llvm.loop.parallel metadata for loads and stores for loops
2871 // with dynamic/guided scheduling and without ordered clause.
2872 if (!isOpenMPSimdDirective(S
.getDirectiveKind())) {
2873 CGF
.LoopStack
.setParallel(!IsMonotonic
);
2874 if (const auto *C
= S
.getSingleClause
<OMPOrderClause
>())
2875 if (C
->getKind() == OMPC_ORDER_concurrent
)
2876 CGF
.LoopStack
.setParallel(/*Enable=*/true);
2878 CGF
.EmitOMPSimdInit(S
);
2881 [&S
, &LoopArgs
, LoopExit
, &CodeGenLoop
, IVSize
, IVSigned
, &CodeGenOrdered
,
2882 &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2883 SourceLocation Loc
= S
.getBeginLoc();
2884 // when 'distribute' is not combined with a 'for':
2885 // while (idx <= UB) { BODY; ++idx; }
2886 // when 'distribute' is combined with a 'for'
2887 // (e.g. 'distribute parallel for')
2888 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2889 CGF
.EmitOMPInnerLoop(
2890 S
, LoopScope
.requiresCleanups(), LoopArgs
.Cond
, LoopArgs
.IncExpr
,
2891 [&S
, LoopExit
, &CodeGenLoop
](CodeGenFunction
&CGF
) {
2892 CodeGenLoop(CGF
, S
, LoopExit
);
2894 [IVSize
, IVSigned
, Loc
, &CodeGenOrdered
](CodeGenFunction
&CGF
) {
2895 CodeGenOrdered(CGF
, Loc
, IVSize
, IVSigned
);
2899 EmitBlock(Continue
.getBlock());
2900 BreakContinueStack
.pop_back();
2901 if (!DynamicOrOrdered
) {
2902 // Emit "LB = LB + Stride", "UB = UB + Stride".
2903 EmitIgnoredExpr(LoopArgs
.NextLB
);
2904 EmitIgnoredExpr(LoopArgs
.NextUB
);
2907 EmitBranch(CondBlock
);
2908 OMPLoopNestStack
.clear();
2910 // Emit the fall-through block.
2911 EmitBlock(LoopExit
.getBlock());
2913 // Tell the runtime we are done.
2914 auto &&CodeGen
= [DynamicOrOrdered
, &S
](CodeGenFunction
&CGF
) {
2915 if (!DynamicOrOrdered
)
2916 CGF
.CGM
.getOpenMPRuntime().emitForStaticFinish(CGF
, S
.getEndLoc(),
2917 S
.getDirectiveKind());
2919 OMPCancelStack
.emitExit(*this, S
.getDirectiveKind(), CodeGen
);
2922 void CodeGenFunction::EmitOMPForOuterLoop(
2923 const OpenMPScheduleTy
&ScheduleKind
, bool IsMonotonic
,
2924 const OMPLoopDirective
&S
, OMPPrivateScope
&LoopScope
, bool Ordered
,
2925 const OMPLoopArguments
&LoopArgs
,
2926 const CodeGenDispatchBoundsTy
&CGDispatchBounds
) {
2927 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
2929 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2930 const bool DynamicOrOrdered
= Ordered
|| RT
.isDynamic(ScheduleKind
.Schedule
);
2932 assert((Ordered
|| !RT
.isStaticNonchunked(ScheduleKind
.Schedule
,
2933 LoopArgs
.Chunk
!= nullptr)) &&
2934 "static non-chunked schedule does not need outer loop");
2938 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2939 // When schedule(dynamic,chunk_size) is specified, the iterations are
2940 // distributed to threads in the team in chunks as the threads request them.
2941 // Each thread executes a chunk of iterations, then requests another chunk,
2942 // until no chunks remain to be distributed. Each chunk contains chunk_size
2943 // iterations, except for the last chunk to be distributed, which may have
2944 // fewer iterations. When no chunk_size is specified, it defaults to 1.
2946 // When schedule(guided,chunk_size) is specified, the iterations are assigned
2947 // to threads in the team in chunks as the executing threads request them.
2948 // Each thread executes a chunk of iterations, then requests another chunk,
2949 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2950 // each chunk is proportional to the number of unassigned iterations divided
2951 // by the number of threads in the team, decreasing to 1. For a chunk_size
2952 // with value k (greater than 1), the size of each chunk is determined in the
2953 // same way, with the restriction that the chunks do not contain fewer than k
2954 // iterations (except for the last chunk to be assigned, which may have fewer
2955 // than k iterations).
2957 // When schedule(auto) is specified, the decision regarding scheduling is
2958 // delegated to the compiler and/or runtime system. The programmer gives the
2959 // implementation the freedom to choose any possible mapping of iterations to
2960 // threads in the team.
2962 // When schedule(runtime) is specified, the decision regarding scheduling is
2963 // deferred until run time, and the schedule and chunk size are taken from the
2964 // run-sched-var ICV. If the ICV is set to auto, the schedule is
2965 // implementation defined
2967 // while(__kmpc_dispatch_next(&LB, &UB)) {
2969 // while (idx <= UB) { BODY; ++idx;
2970 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2974 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2975 // When schedule(static, chunk_size) is specified, iterations are divided into
2976 // chunks of size chunk_size, and the chunks are assigned to the threads in
2977 // the team in a round-robin fashion in the order of the thread number.
2979 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2980 // while (idx <= UB) { BODY; ++idx; } // inner loop
2986 const Expr
*IVExpr
= S
.getIterationVariable();
2987 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
2988 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
2990 if (DynamicOrOrdered
) {
2991 const std::pair
<llvm::Value
*, llvm::Value
*> DispatchBounds
=
2992 CGDispatchBounds(*this, S
, LoopArgs
.LB
, LoopArgs
.UB
);
2993 llvm::Value
*LBVal
= DispatchBounds
.first
;
2994 llvm::Value
*UBVal
= DispatchBounds
.second
;
2995 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues
= {LBVal
, UBVal
,
2997 RT
.emitForDispatchInit(*this, S
.getBeginLoc(), ScheduleKind
, IVSize
,
2998 IVSigned
, Ordered
, DipatchRTInputValues
);
3000 CGOpenMPRuntime::StaticRTInput
StaticInit(
3001 IVSize
, IVSigned
, Ordered
, LoopArgs
.IL
, LoopArgs
.LB
, LoopArgs
.UB
,
3002 LoopArgs
.ST
, LoopArgs
.Chunk
);
3003 RT
.emitForStaticInit(*this, S
.getBeginLoc(), S
.getDirectiveKind(),
3004 ScheduleKind
, StaticInit
);
3007 auto &&CodeGenOrdered
= [Ordered
](CodeGenFunction
&CGF
, SourceLocation Loc
,
3008 const unsigned IVSize
,
3009 const bool IVSigned
) {
3011 CGF
.CGM
.getOpenMPRuntime().emitForOrderedIterationEnd(CGF
, Loc
, IVSize
,
3016 OMPLoopArguments
OuterLoopArgs(LoopArgs
.LB
, LoopArgs
.UB
, LoopArgs
.ST
,
3017 LoopArgs
.IL
, LoopArgs
.Chunk
, LoopArgs
.EUB
);
3018 OuterLoopArgs
.IncExpr
= S
.getInc();
3019 OuterLoopArgs
.Init
= S
.getInit();
3020 OuterLoopArgs
.Cond
= S
.getCond();
3021 OuterLoopArgs
.NextLB
= S
.getNextLowerBound();
3022 OuterLoopArgs
.NextUB
= S
.getNextUpperBound();
3023 EmitOMPOuterLoop(DynamicOrOrdered
, IsMonotonic
, S
, LoopScope
, OuterLoopArgs
,
3024 emitOMPLoopBodyWithStopPoint
, CodeGenOrdered
);
3027 static void emitEmptyOrdered(CodeGenFunction
&, SourceLocation Loc
,
3028 const unsigned IVSize
, const bool IVSigned
) {}
3030 void CodeGenFunction::EmitOMPDistributeOuterLoop(
3031 OpenMPDistScheduleClauseKind ScheduleKind
, const OMPLoopDirective
&S
,
3032 OMPPrivateScope
&LoopScope
, const OMPLoopArguments
&LoopArgs
,
3033 const CodeGenLoopTy
&CodeGenLoopContent
) {
3035 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
3038 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3042 const Expr
*IVExpr
= S
.getIterationVariable();
3043 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
3044 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
3046 CGOpenMPRuntime::StaticRTInput
StaticInit(
3047 IVSize
, IVSigned
, /* Ordered = */ false, LoopArgs
.IL
, LoopArgs
.LB
,
3048 LoopArgs
.UB
, LoopArgs
.ST
, LoopArgs
.Chunk
);
3049 RT
.emitDistributeStaticInit(*this, S
.getBeginLoc(), ScheduleKind
, StaticInit
);
3051 // for combined 'distribute' and 'for' the increment expression of distribute
3052 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3054 if (isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind()))
3055 IncExpr
= S
.getDistInc();
3057 IncExpr
= S
.getInc();
3059 // this routine is shared by 'omp distribute parallel for' and
3060 // 'omp distribute': select the right EUB expression depending on the
3062 OMPLoopArguments OuterLoopArgs
;
3063 OuterLoopArgs
.LB
= LoopArgs
.LB
;
3064 OuterLoopArgs
.UB
= LoopArgs
.UB
;
3065 OuterLoopArgs
.ST
= LoopArgs
.ST
;
3066 OuterLoopArgs
.IL
= LoopArgs
.IL
;
3067 OuterLoopArgs
.Chunk
= LoopArgs
.Chunk
;
3068 OuterLoopArgs
.EUB
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3069 ? S
.getCombinedEnsureUpperBound()
3070 : S
.getEnsureUpperBound();
3071 OuterLoopArgs
.IncExpr
= IncExpr
;
3072 OuterLoopArgs
.Init
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3073 ? S
.getCombinedInit()
3075 OuterLoopArgs
.Cond
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3076 ? S
.getCombinedCond()
3078 OuterLoopArgs
.NextLB
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3079 ? S
.getCombinedNextLowerBound()
3080 : S
.getNextLowerBound();
3081 OuterLoopArgs
.NextUB
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3082 ? S
.getCombinedNextUpperBound()
3083 : S
.getNextUpperBound();
3085 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S
,
3086 LoopScope
, OuterLoopArgs
, CodeGenLoopContent
,
3090 static std::pair
<LValue
, LValue
>
3091 emitDistributeParallelForInnerBounds(CodeGenFunction
&CGF
,
3092 const OMPExecutableDirective
&S
) {
3093 const OMPLoopDirective
&LS
= cast
<OMPLoopDirective
>(S
);
3095 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getLowerBoundVariable()));
3097 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getUpperBoundVariable()));
3099 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3100 // parallel for') we need to use the 'distribute'
3101 // chunk lower and upper bounds rather than the whole loop iteration
3102 // space. These are parameters to the outlined function for 'parallel'
3103 // and we copy the bounds of the previous schedule into the
3104 // the current ones.
3105 LValue PrevLB
= CGF
.EmitLValue(LS
.getPrevLowerBoundVariable());
3106 LValue PrevUB
= CGF
.EmitLValue(LS
.getPrevUpperBoundVariable());
3107 llvm::Value
*PrevLBVal
= CGF
.EmitLoadOfScalar(
3108 PrevLB
, LS
.getPrevLowerBoundVariable()->getExprLoc());
3109 PrevLBVal
= CGF
.EmitScalarConversion(
3110 PrevLBVal
, LS
.getPrevLowerBoundVariable()->getType(),
3111 LS
.getIterationVariable()->getType(),
3112 LS
.getPrevLowerBoundVariable()->getExprLoc());
3113 llvm::Value
*PrevUBVal
= CGF
.EmitLoadOfScalar(
3114 PrevUB
, LS
.getPrevUpperBoundVariable()->getExprLoc());
3115 PrevUBVal
= CGF
.EmitScalarConversion(
3116 PrevUBVal
, LS
.getPrevUpperBoundVariable()->getType(),
3117 LS
.getIterationVariable()->getType(),
3118 LS
.getPrevUpperBoundVariable()->getExprLoc());
3120 CGF
.EmitStoreOfScalar(PrevLBVal
, LB
);
3121 CGF
.EmitStoreOfScalar(PrevUBVal
, UB
);
3126 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3127 /// we need to use the LB and UB expressions generated by the worksharing
3128 /// code generation support, whereas in non combined situations we would
3129 /// just emit 0 and the LastIteration expression
3130 /// This function is necessary due to the difference of the LB and UB
3131 /// types for the RT emission routines for 'for_static_init' and
3132 /// 'for_dispatch_init'
3133 static std::pair
<llvm::Value
*, llvm::Value
*>
3134 emitDistributeParallelForDispatchBounds(CodeGenFunction
&CGF
,
3135 const OMPExecutableDirective
&S
,
3136 Address LB
, Address UB
) {
3137 const OMPLoopDirective
&LS
= cast
<OMPLoopDirective
>(S
);
3138 const Expr
*IVExpr
= LS
.getIterationVariable();
3139 // when implementing a dynamic schedule for a 'for' combined with a
3140 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3141 // is not normalized as each team only executes its own assigned
3143 QualType IteratorTy
= IVExpr
->getType();
3144 llvm::Value
*LBVal
=
3145 CGF
.EmitLoadOfScalar(LB
, /*Volatile=*/false, IteratorTy
, S
.getBeginLoc());
3146 llvm::Value
*UBVal
=
3147 CGF
.EmitLoadOfScalar(UB
, /*Volatile=*/false, IteratorTy
, S
.getBeginLoc());
3148 return {LBVal
, UBVal
};
3151 static void emitDistributeParallelForDistributeInnerBoundParams(
3152 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
3153 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
) {
3154 const auto &Dir
= cast
<OMPLoopDirective
>(S
);
3156 CGF
.EmitLValue(cast
<DeclRefExpr
>(Dir
.getCombinedLowerBoundVariable()));
3157 llvm::Value
*LBCast
=
3158 CGF
.Builder
.CreateIntCast(CGF
.Builder
.CreateLoad(LB
.getAddress(CGF
)),
3159 CGF
.SizeTy
, /*isSigned=*/false);
3160 CapturedVars
.push_back(LBCast
);
3162 CGF
.EmitLValue(cast
<DeclRefExpr
>(Dir
.getCombinedUpperBoundVariable()));
3164 llvm::Value
*UBCast
=
3165 CGF
.Builder
.CreateIntCast(CGF
.Builder
.CreateLoad(UB
.getAddress(CGF
)),
3166 CGF
.SizeTy
, /*isSigned=*/false);
3167 CapturedVars
.push_back(UBCast
);
3171 emitInnerParallelForWhenCombined(CodeGenFunction
&CGF
,
3172 const OMPLoopDirective
&S
,
3173 CodeGenFunction::JumpDest LoopExit
) {
3174 auto &&CGInlinedWorksharingLoop
= [&S
](CodeGenFunction
&CGF
,
3175 PrePostActionTy
&Action
) {
3177 bool HasCancel
= false;
3178 if (!isOpenMPSimdDirective(S
.getDirectiveKind())) {
3179 if (const auto *D
= dyn_cast
<OMPTeamsDistributeParallelForDirective
>(&S
))
3180 HasCancel
= D
->hasCancel();
3181 else if (const auto *D
= dyn_cast
<OMPDistributeParallelForDirective
>(&S
))
3182 HasCancel
= D
->hasCancel();
3183 else if (const auto *D
=
3184 dyn_cast
<OMPTargetTeamsDistributeParallelForDirective
>(&S
))
3185 HasCancel
= D
->hasCancel();
3187 CodeGenFunction::OMPCancelStackRAII
CancelRegion(CGF
, S
.getDirectiveKind(),
3189 CGF
.EmitOMPWorksharingLoop(S
, S
.getPrevEnsureUpperBound(),
3190 emitDistributeParallelForInnerBounds
,
3191 emitDistributeParallelForDispatchBounds
);
3194 emitCommonOMPParallelDirective(
3196 isOpenMPSimdDirective(S
.getDirectiveKind()) ? OMPD_for_simd
: OMPD_for
,
3197 CGInlinedWorksharingLoop
,
3198 emitDistributeParallelForDistributeInnerBoundParams
);
3201 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3202 const OMPDistributeParallelForDirective
&S
) {
3203 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3204 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
3207 OMPLexicalScope
Scope(*this, S
, OMPD_parallel
);
3208 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute
, CodeGen
);
3211 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3212 const OMPDistributeParallelForSimdDirective
&S
) {
3213 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3214 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
3217 OMPLexicalScope
Scope(*this, S
, OMPD_parallel
);
3218 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute
, CodeGen
);
3221 void CodeGenFunction::EmitOMPDistributeSimdDirective(
3222 const OMPDistributeSimdDirective
&S
) {
3223 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3224 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
3226 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
3227 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
, CodeGen
);
3230 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3231 CodeGenModule
&CGM
, StringRef ParentName
, const OMPTargetSimdDirective
&S
) {
3232 // Emit SPMD target parallel for region as a standalone region.
3233 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
3234 emitOMPSimdRegion(CGF
, S
, Action
);
3237 llvm::Constant
*Addr
;
3238 // Emit target region as a standalone region.
3239 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
3240 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
3241 assert(Fn
&& Addr
&& "Target device function emission failed.");
3244 void CodeGenFunction::EmitOMPTargetSimdDirective(
3245 const OMPTargetSimdDirective
&S
) {
3246 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
3247 emitOMPSimdRegion(CGF
, S
, Action
);
3249 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
3253 struct ScheduleKindModifiersTy
{
3254 OpenMPScheduleClauseKind Kind
;
3255 OpenMPScheduleClauseModifier M1
;
3256 OpenMPScheduleClauseModifier M2
;
3257 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind
,
3258 OpenMPScheduleClauseModifier M1
,
3259 OpenMPScheduleClauseModifier M2
)
3260 : Kind(Kind
), M1(M1
), M2(M2
) {}
3264 bool CodeGenFunction::EmitOMPWorksharingLoop(
3265 const OMPLoopDirective
&S
, Expr
*EUB
,
3266 const CodeGenLoopBoundsTy
&CodeGenLoopBounds
,
3267 const CodeGenDispatchBoundsTy
&CGDispatchBounds
) {
3268 // Emit the loop iteration variable.
3269 const auto *IVExpr
= cast
<DeclRefExpr
>(S
.getIterationVariable());
3270 const auto *IVDecl
= cast
<VarDecl
>(IVExpr
->getDecl());
3271 EmitVarDecl(*IVDecl
);
3273 // Emit the iterations count variable.
3274 // If it is not a variable, Sema decided to calculate iterations count on each
3275 // iteration (e.g., it is foldable into a constant).
3276 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
3277 EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
3278 // Emit calculation of the iterations count.
3279 EmitIgnoredExpr(S
.getCalcLastIteration());
3282 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
3284 bool HasLastprivateClause
;
3285 // Check pre-condition.
3287 OMPLoopScope
PreInitScope(*this, S
);
3288 // Skip the entire loop if we don't meet the precondition.
3289 // If the condition constant folds and can be elided, avoid emitting the
3292 llvm::BasicBlock
*ContBlock
= nullptr;
3293 if (ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
3297 llvm::BasicBlock
*ThenBlock
= createBasicBlock("omp.precond.then");
3298 ContBlock
= createBasicBlock("omp.precond.end");
3299 emitPreCond(*this, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
3300 getProfileCount(&S
));
3301 EmitBlock(ThenBlock
);
3302 incrementProfileCounter(&S
);
3305 RunCleanupsScope
DoacrossCleanupScope(*this);
3306 bool Ordered
= false;
3307 if (const auto *OrderedClause
= S
.getSingleClause
<OMPOrderedClause
>()) {
3308 if (OrderedClause
->getNumForLoops())
3309 RT
.emitDoacrossInit(*this, S
, OrderedClause
->getLoopNumIterations());
3314 llvm::DenseSet
<const Expr
*> EmittedFinals
;
3315 emitAlignedClause(*this, S
);
3316 bool HasLinears
= EmitOMPLinearClauseInit(S
);
3317 // Emit helper vars inits.
3319 std::pair
<LValue
, LValue
> Bounds
= CodeGenLoopBounds(*this, S
);
3320 LValue LB
= Bounds
.first
;
3321 LValue UB
= Bounds
.second
;
3323 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getStrideVariable()));
3325 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getIsLastIterVariable()));
3327 // Emit 'then' code.
3329 OMPPrivateScope
LoopScope(*this);
3330 if (EmitOMPFirstprivateClause(S
, LoopScope
) || HasLinears
) {
3331 // Emit implicit barrier to synchronize threads and avoid data races on
3332 // initialization of firstprivate variables and post-update of
3333 // lastprivate variables.
3334 CGM
.getOpenMPRuntime().emitBarrierCall(
3335 *this, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
3336 /*ForceSimpleCall=*/true);
3338 EmitOMPPrivateClause(S
, LoopScope
);
3339 CGOpenMPRuntime::LastprivateConditionalRAII
LPCRegion(
3340 *this, S
, EmitLValue(S
.getIterationVariable()));
3341 HasLastprivateClause
= EmitOMPLastprivateClauseInit(S
, LoopScope
);
3342 EmitOMPReductionClauseInit(S
, LoopScope
);
3343 EmitOMPPrivateLoopCounters(S
, LoopScope
);
3344 EmitOMPLinearClause(S
, LoopScope
);
3345 (void)LoopScope
.Privatize();
3346 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
3347 CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S
);
3349 // Detect the loop schedule kind and chunk.
3350 const Expr
*ChunkExpr
= nullptr;
3351 OpenMPScheduleTy ScheduleKind
;
3352 if (const auto *C
= S
.getSingleClause
<OMPScheduleClause
>()) {
3353 ScheduleKind
.Schedule
= C
->getScheduleKind();
3354 ScheduleKind
.M1
= C
->getFirstScheduleModifier();
3355 ScheduleKind
.M2
= C
->getSecondScheduleModifier();
3356 ChunkExpr
= C
->getChunkSize();
3358 // Default behaviour for schedule clause.
3359 CGM
.getOpenMPRuntime().getDefaultScheduleAndChunk(
3360 *this, S
, ScheduleKind
.Schedule
, ChunkExpr
);
3362 bool HasChunkSizeOne
= false;
3363 llvm::Value
*Chunk
= nullptr;
3365 Chunk
= EmitScalarExpr(ChunkExpr
);
3366 Chunk
= EmitScalarConversion(Chunk
, ChunkExpr
->getType(),
3367 S
.getIterationVariable()->getType(),
3369 Expr::EvalResult Result
;
3370 if (ChunkExpr
->EvaluateAsInt(Result
, getContext())) {
3371 llvm::APSInt EvaluatedChunk
= Result
.Val
.getInt();
3372 HasChunkSizeOne
= (EvaluatedChunk
.getLimitedValue() == 1);
3375 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
3376 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
3377 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3378 // If the static schedule kind is specified or if the ordered clause is
3379 // specified, and if no monotonic modifier is specified, the effect will
3380 // be as if the monotonic modifier was specified.
3381 bool StaticChunkedOne
=
3382 RT
.isStaticChunked(ScheduleKind
.Schedule
,
3383 /* Chunked */ Chunk
!= nullptr) &&
3385 isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind());
3388 (ScheduleKind
.Schedule
== OMPC_SCHEDULE_static
&&
3389 !(ScheduleKind
.M1
== OMPC_SCHEDULE_MODIFIER_nonmonotonic
||
3390 ScheduleKind
.M2
== OMPC_SCHEDULE_MODIFIER_nonmonotonic
)) ||
3391 ScheduleKind
.M1
== OMPC_SCHEDULE_MODIFIER_monotonic
||
3392 ScheduleKind
.M2
== OMPC_SCHEDULE_MODIFIER_monotonic
;
3393 if ((RT
.isStaticNonchunked(ScheduleKind
.Schedule
,
3394 /* Chunked */ Chunk
!= nullptr) ||
3395 StaticChunkedOne
) &&
3398 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3401 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3402 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
3403 CGF
.EmitOMPSimdInit(S
);
3404 } else if (const auto *C
= S
.getSingleClause
<OMPOrderClause
>()) {
3405 if (C
->getKind() == OMPC_ORDER_concurrent
)
3406 CGF
.LoopStack
.setParallel(/*Enable=*/true);
3409 [IVSize
, IVSigned
, Ordered
, IL
, LB
, UB
, ST
, StaticChunkedOne
, Chunk
,
3410 &S
, ScheduleKind
, LoopExit
,
3411 &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3412 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3413 // When no chunk_size is specified, the iteration space is divided
3414 // into chunks that are approximately equal in size, and at most
3415 // one chunk is distributed to each thread. Note that the size of
3416 // the chunks is unspecified in this case.
3417 CGOpenMPRuntime::StaticRTInput
StaticInit(
3418 IVSize
, IVSigned
, Ordered
, IL
.getAddress(CGF
),
3419 LB
.getAddress(CGF
), UB
.getAddress(CGF
), ST
.getAddress(CGF
),
3420 StaticChunkedOne
? Chunk
: nullptr);
3421 CGF
.CGM
.getOpenMPRuntime().emitForStaticInit(
3422 CGF
, S
.getBeginLoc(), S
.getDirectiveKind(), ScheduleKind
,
3424 // UB = min(UB, GlobalUB);
3425 if (!StaticChunkedOne
)
3426 CGF
.EmitIgnoredExpr(S
.getEnsureUpperBound());
3428 CGF
.EmitIgnoredExpr(S
.getInit());
3429 // For unchunked static schedule generate:
3431 // while (idx <= UB) {
3436 // For static schedule with chunk one:
3438 // while (IV <= PrevUB) {
3442 CGF
.EmitOMPInnerLoop(
3443 S
, LoopScope
.requiresCleanups(),
3444 StaticChunkedOne
? S
.getCombinedParForInDistCond()
3446 StaticChunkedOne
? S
.getDistInc() : S
.getInc(),
3447 [&S
, LoopExit
](CodeGenFunction
&CGF
) {
3448 emitOMPLoopBodyWithStopPoint(CGF
, S
, LoopExit
);
3450 [](CodeGenFunction
&) {});
3452 EmitBlock(LoopExit
.getBlock());
3453 // Tell the runtime we are done.
3454 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
) {
3455 CGF
.CGM
.getOpenMPRuntime().emitForStaticFinish(CGF
, S
.getEndLoc(),
3456 S
.getDirectiveKind());
3458 OMPCancelStack
.emitExit(*this, S
.getDirectiveKind(), CodeGen
);
3460 // Emit the outer loop, which requests its work chunk [LB..UB] from
3461 // runtime and runs the inner loop to process it.
3462 const OMPLoopArguments
LoopArguments(
3463 LB
.getAddress(*this), UB
.getAddress(*this), ST
.getAddress(*this),
3464 IL
.getAddress(*this), Chunk
, EUB
);
3465 EmitOMPForOuterLoop(ScheduleKind
, IsMonotonic
, S
, LoopScope
, Ordered
,
3466 LoopArguments
, CGDispatchBounds
);
3468 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
3469 EmitOMPSimdFinal(S
, [IL
, &S
](CodeGenFunction
&CGF
) {
3470 return CGF
.Builder
.CreateIsNotNull(
3471 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
3474 EmitOMPReductionClauseFinal(
3475 S
, /*ReductionKind=*/isOpenMPSimdDirective(S
.getDirectiveKind())
3476 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3477 : /*Parallel only*/ OMPD_parallel
);
3478 // Emit post-update of the reduction variables if IsLastIter != 0.
3479 emitPostUpdateForReductionClause(
3480 *this, S
, [IL
, &S
](CodeGenFunction
&CGF
) {
3481 return CGF
.Builder
.CreateIsNotNull(
3482 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
3484 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3485 if (HasLastprivateClause
)
3486 EmitOMPLastprivateClauseFinal(
3487 S
, isOpenMPSimdDirective(S
.getDirectiveKind()),
3488 Builder
.CreateIsNotNull(EmitLoadOfScalar(IL
, S
.getBeginLoc())));
3489 LoopScope
.restoreMap();
3490 EmitOMPLinearClauseFinal(S
, [IL
, &S
](CodeGenFunction
&CGF
) {
3491 return CGF
.Builder
.CreateIsNotNull(
3492 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
3495 DoacrossCleanupScope
.ForceCleanup();
3496 // We're now done with the loop, so jump to the continuation block.
3498 EmitBranch(ContBlock
);
3499 EmitBlock(ContBlock
, /*IsFinished=*/true);
3502 return HasLastprivateClause
;
3505 /// The following two functions generate expressions for the loop lower
3506 /// and upper bounds in case of static and dynamic (dispatch) schedule
3507 /// of the associated 'for' or 'distribute' loop.
3508 static std::pair
<LValue
, LValue
>
3509 emitForLoopBounds(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
3510 const auto &LS
= cast
<OMPLoopDirective
>(S
);
3512 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getLowerBoundVariable()));
3514 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getUpperBoundVariable()));
3518 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3519 /// consider the lower and upper bound expressions generated by the
3520 /// worksharing loop support, but we use 0 and the iteration space size as
3522 static std::pair
<llvm::Value
*, llvm::Value
*>
3523 emitDispatchForLoopBounds(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
3524 Address LB
, Address UB
) {
3525 const auto &LS
= cast
<OMPLoopDirective
>(S
);
3526 const Expr
*IVExpr
= LS
.getIterationVariable();
3527 const unsigned IVSize
= CGF
.getContext().getTypeSize(IVExpr
->getType());
3528 llvm::Value
*LBVal
= CGF
.Builder
.getIntN(IVSize
, 0);
3529 llvm::Value
*UBVal
= CGF
.EmitScalarExpr(LS
.getLastIteration());
3530 return {LBVal
, UBVal
};
3533 /// Emits internal temp array declarations for the directive with inscan
3535 /// The code is the following:
3537 /// size num_iters = <num_iters>;
3538 /// <type> buffer[num_iters];
3540 static void emitScanBasedDirectiveDecls(
3541 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
3542 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> NumIteratorsGen
) {
3543 llvm::Value
*OMPScanNumIterations
= CGF
.Builder
.CreateIntCast(
3544 NumIteratorsGen(CGF
), CGF
.SizeTy
, /*isSigned=*/false);
3545 SmallVector
<const Expr
*, 4> Shareds
;
3546 SmallVector
<const Expr
*, 4> Privates
;
3547 SmallVector
<const Expr
*, 4> ReductionOps
;
3548 SmallVector
<const Expr
*, 4> CopyArrayTemps
;
3549 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
3550 assert(C
->getModifier() == OMPC_REDUCTION_inscan
&&
3551 "Only inscan reductions are expected.");
3552 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
3553 Privates
.append(C
->privates().begin(), C
->privates().end());
3554 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
3555 CopyArrayTemps
.append(C
->copy_array_temps().begin(),
3556 C
->copy_array_temps().end());
3559 // Emit buffers for each reduction variables.
3560 // ReductionCodeGen is required to emit correctly the code for array
3562 ReductionCodeGen
RedCG(Shareds
, Shareds
, Privates
, ReductionOps
);
3564 auto *ITA
= CopyArrayTemps
.begin();
3565 for (const Expr
*IRef
: Privates
) {
3566 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IRef
)->getDecl());
3567 // Emit variably modified arrays, used for arrays/array sections
3569 if (PrivateVD
->getType()->isVariablyModifiedType()) {
3570 RedCG
.emitSharedOrigLValue(CGF
, Count
);
3571 RedCG
.emitAggregateType(CGF
, Count
);
3573 CodeGenFunction::OpaqueValueMapping
DimMapping(
3575 cast
<OpaqueValueExpr
>(
3576 cast
<VariableArrayType
>((*ITA
)->getType()->getAsArrayTypeUnsafe())
3578 RValue::get(OMPScanNumIterations
));
3579 // Emit temp buffer.
3580 CGF
.EmitVarDecl(*cast
<VarDecl
>(cast
<DeclRefExpr
>(*ITA
)->getDecl()));
3587 /// Copies final inscan reductions values to the original variables.
3588 /// The code is the following:
3590 /// <orig_var> = buffer[num_iters-1];
3592 static void emitScanBasedDirectiveFinals(
3593 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
3594 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> NumIteratorsGen
) {
3595 llvm::Value
*OMPScanNumIterations
= CGF
.Builder
.CreateIntCast(
3596 NumIteratorsGen(CGF
), CGF
.SizeTy
, /*isSigned=*/false);
3597 SmallVector
<const Expr
*, 4> Shareds
;
3598 SmallVector
<const Expr
*, 4> LHSs
;
3599 SmallVector
<const Expr
*, 4> RHSs
;
3600 SmallVector
<const Expr
*, 4> Privates
;
3601 SmallVector
<const Expr
*, 4> CopyOps
;
3602 SmallVector
<const Expr
*, 4> CopyArrayElems
;
3603 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
3604 assert(C
->getModifier() == OMPC_REDUCTION_inscan
&&
3605 "Only inscan reductions are expected.");
3606 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
3607 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
3608 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
3609 Privates
.append(C
->privates().begin(), C
->privates().end());
3610 CopyOps
.append(C
->copy_ops().begin(), C
->copy_ops().end());
3611 CopyArrayElems
.append(C
->copy_array_elems().begin(),
3612 C
->copy_array_elems().end());
3614 // Create temp var and copy LHS value to this temp value.
3615 // LHS = TMP[LastIter];
3616 llvm::Value
*OMPLast
= CGF
.Builder
.CreateNSWSub(
3617 OMPScanNumIterations
,
3618 llvm::ConstantInt::get(CGF
.SizeTy
, 1, /*isSigned=*/false));
3619 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
3620 const Expr
*PrivateExpr
= Privates
[I
];
3621 const Expr
*OrigExpr
= Shareds
[I
];
3622 const Expr
*CopyArrayElem
= CopyArrayElems
[I
];
3623 CodeGenFunction::OpaqueValueMapping
IdxMapping(
3625 cast
<OpaqueValueExpr
>(
3626 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
3627 RValue::get(OMPLast
));
3628 LValue DestLVal
= CGF
.EmitLValue(OrigExpr
);
3629 LValue SrcLVal
= CGF
.EmitLValue(CopyArrayElem
);
3630 CGF
.EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(CGF
),
3631 SrcLVal
.getAddress(CGF
),
3632 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
3633 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
3638 /// Emits the code for the directive with inscan reductions.
3639 /// The code is the following:
3642 /// for (i: 0..<num_iters>) {
3644 /// buffer[i] = red;
3646 /// #pragma omp master // in parallel region
3647 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3648 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3649 /// buffer[i] op= buffer[i-pow(2,k)];
3650 /// #pragma omp barrier // in parallel region
3652 /// for (0..<num_iters>) {
3653 /// red = InclusiveScan ? buffer[i] : buffer[i-1];
3657 static void emitScanBasedDirective(
3658 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
3659 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> NumIteratorsGen
,
3660 llvm::function_ref
<void(CodeGenFunction
&)> FirstGen
,
3661 llvm::function_ref
<void(CodeGenFunction
&)> SecondGen
) {
3662 llvm::Value
*OMPScanNumIterations
= CGF
.Builder
.CreateIntCast(
3663 NumIteratorsGen(CGF
), CGF
.SizeTy
, /*isSigned=*/false);
3664 SmallVector
<const Expr
*, 4> Privates
;
3665 SmallVector
<const Expr
*, 4> ReductionOps
;
3666 SmallVector
<const Expr
*, 4> LHSs
;
3667 SmallVector
<const Expr
*, 4> RHSs
;
3668 SmallVector
<const Expr
*, 4> CopyArrayElems
;
3669 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
3670 assert(C
->getModifier() == OMPC_REDUCTION_inscan
&&
3671 "Only inscan reductions are expected.");
3672 Privates
.append(C
->privates().begin(), C
->privates().end());
3673 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
3674 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
3675 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
3676 CopyArrayElems
.append(C
->copy_array_elems().begin(),
3677 C
->copy_array_elems().end());
3679 CodeGenFunction::ParentLoopDirectiveForScanRegion
ScanRegion(CGF
, S
);
3681 // Emit loop with input phase:
3683 // for (i: 0..<num_iters>) {
3687 CGF
.OMPFirstScanLoop
= true;
3688 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
3691 // #pragma omp barrier // in parallel region
3692 auto &&CodeGen
= [&S
, OMPScanNumIterations
, &LHSs
, &RHSs
, &CopyArrayElems
,
3694 &Privates
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
3696 // Emit prefix reduction:
3697 // #pragma omp master // in parallel region
3698 // for (int k = 0; k <= ceil(log2(n)); ++k)
3699 llvm::BasicBlock
*InputBB
= CGF
.Builder
.GetInsertBlock();
3700 llvm::BasicBlock
*LoopBB
= CGF
.createBasicBlock("omp.outer.log.scan.body");
3701 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock("omp.outer.log.scan.exit");
3703 CGF
.CGM
.getIntrinsic(llvm::Intrinsic::log2
, CGF
.DoubleTy
);
3705 CGF
.Builder
.CreateUIToFP(OMPScanNumIterations
, CGF
.DoubleTy
);
3706 llvm::Value
*LogVal
= CGF
.EmitNounwindRuntimeCall(F
, Arg
);
3707 F
= CGF
.CGM
.getIntrinsic(llvm::Intrinsic::ceil
, CGF
.DoubleTy
);
3708 LogVal
= CGF
.EmitNounwindRuntimeCall(F
, LogVal
);
3709 LogVal
= CGF
.Builder
.CreateFPToUI(LogVal
, CGF
.IntTy
);
3710 llvm::Value
*NMin1
= CGF
.Builder
.CreateNUWSub(
3711 OMPScanNumIterations
, llvm::ConstantInt::get(CGF
.SizeTy
, 1));
3712 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, S
.getBeginLoc());
3713 CGF
.EmitBlock(LoopBB
);
3714 auto *Counter
= CGF
.Builder
.CreatePHI(CGF
.IntTy
, 2);
3716 auto *Pow2K
= CGF
.Builder
.CreatePHI(CGF
.SizeTy
, 2);
3717 Counter
->addIncoming(llvm::ConstantInt::get(CGF
.IntTy
, 0), InputBB
);
3718 Pow2K
->addIncoming(llvm::ConstantInt::get(CGF
.SizeTy
, 1), InputBB
);
3719 // for (size i = n - 1; i >= 2 ^ k; --i)
3720 // tmp[i] op= tmp[i-pow2k];
3721 llvm::BasicBlock
*InnerLoopBB
=
3722 CGF
.createBasicBlock("omp.inner.log.scan.body");
3723 llvm::BasicBlock
*InnerExitBB
=
3724 CGF
.createBasicBlock("omp.inner.log.scan.exit");
3725 llvm::Value
*CmpI
= CGF
.Builder
.CreateICmpUGE(NMin1
, Pow2K
);
3726 CGF
.Builder
.CreateCondBr(CmpI
, InnerLoopBB
, InnerExitBB
);
3727 CGF
.EmitBlock(InnerLoopBB
);
3728 auto *IVal
= CGF
.Builder
.CreatePHI(CGF
.SizeTy
, 2);
3729 IVal
->addIncoming(NMin1
, LoopBB
);
3731 CodeGenFunction::OMPPrivateScope
PrivScope(CGF
);
3732 auto *ILHS
= LHSs
.begin();
3733 auto *IRHS
= RHSs
.begin();
3734 for (const Expr
*CopyArrayElem
: CopyArrayElems
) {
3735 const auto *LHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
3736 const auto *RHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
3737 Address LHSAddr
= Address::invalid();
3739 CodeGenFunction::OpaqueValueMapping
IdxMapping(
3741 cast
<OpaqueValueExpr
>(
3742 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
3744 LHSAddr
= CGF
.EmitLValue(CopyArrayElem
).getAddress(CGF
);
3746 PrivScope
.addPrivate(LHSVD
, LHSAddr
);
3747 Address RHSAddr
= Address::invalid();
3749 llvm::Value
*OffsetIVal
= CGF
.Builder
.CreateNUWSub(IVal
, Pow2K
);
3750 CodeGenFunction::OpaqueValueMapping
IdxMapping(
3752 cast
<OpaqueValueExpr
>(
3753 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
3754 RValue::get(OffsetIVal
));
3755 RHSAddr
= CGF
.EmitLValue(CopyArrayElem
).getAddress(CGF
);
3757 PrivScope
.addPrivate(RHSVD
, RHSAddr
);
3761 PrivScope
.Privatize();
3762 CGF
.CGM
.getOpenMPRuntime().emitReduction(
3763 CGF
, S
.getEndLoc(), Privates
, LHSs
, RHSs
, ReductionOps
,
3764 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown
});
3766 llvm::Value
*NextIVal
=
3767 CGF
.Builder
.CreateNUWSub(IVal
, llvm::ConstantInt::get(CGF
.SizeTy
, 1));
3768 IVal
->addIncoming(NextIVal
, CGF
.Builder
.GetInsertBlock());
3769 CmpI
= CGF
.Builder
.CreateICmpUGE(NextIVal
, Pow2K
);
3770 CGF
.Builder
.CreateCondBr(CmpI
, InnerLoopBB
, InnerExitBB
);
3771 CGF
.EmitBlock(InnerExitBB
);
3773 CGF
.Builder
.CreateNUWAdd(Counter
, llvm::ConstantInt::get(CGF
.IntTy
, 1));
3774 Counter
->addIncoming(Next
, CGF
.Builder
.GetInsertBlock());
3776 llvm::Value
*NextPow2K
=
3777 CGF
.Builder
.CreateShl(Pow2K
, 1, "", /*HasNUW=*/true);
3778 Pow2K
->addIncoming(NextPow2K
, CGF
.Builder
.GetInsertBlock());
3779 llvm::Value
*Cmp
= CGF
.Builder
.CreateICmpNE(Next
, LogVal
);
3780 CGF
.Builder
.CreateCondBr(Cmp
, LoopBB
, ExitBB
);
3781 auto DL1
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, S
.getEndLoc());
3782 CGF
.EmitBlock(ExitBB
);
3784 if (isOpenMPParallelDirective(S
.getDirectiveKind())) {
3785 CGF
.CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, CodeGen
, S
.getBeginLoc());
3786 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(
3787 CGF
, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
3788 /*ForceSimpleCall=*/true);
3790 RegionCodeGenTy
RCG(CodeGen
);
3794 CGF
.OMPFirstScanLoop
= false;
3798 static bool emitWorksharingDirective(CodeGenFunction
&CGF
,
3799 const OMPLoopDirective
&S
,
3801 bool HasLastprivates
;
3802 if (llvm::any_of(S
.getClausesOfKind
<OMPReductionClause
>(),
3803 [](const OMPReductionClause
*C
) {
3804 return C
->getModifier() == OMPC_REDUCTION_inscan
;
3806 const auto &&NumIteratorsGen
= [&S
](CodeGenFunction
&CGF
) {
3807 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
3808 OMPLoopScope
LoopScope(CGF
, S
);
3809 return CGF
.EmitScalarExpr(S
.getNumIterations());
3811 const auto &&FirstGen
= [&S
, HasCancel
](CodeGenFunction
&CGF
) {
3812 CodeGenFunction::OMPCancelStackRAII
CancelRegion(
3813 CGF
, S
.getDirectiveKind(), HasCancel
);
3814 (void)CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(),
3816 emitDispatchForLoopBounds
);
3817 // Emit an implicit barrier at the end.
3818 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(CGF
, S
.getBeginLoc(),
3821 const auto &&SecondGen
= [&S
, HasCancel
,
3822 &HasLastprivates
](CodeGenFunction
&CGF
) {
3823 CodeGenFunction::OMPCancelStackRAII
CancelRegion(
3824 CGF
, S
.getDirectiveKind(), HasCancel
);
3825 HasLastprivates
= CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(),
3827 emitDispatchForLoopBounds
);
3829 if (!isOpenMPParallelDirective(S
.getDirectiveKind()))
3830 emitScanBasedDirectiveDecls(CGF
, S
, NumIteratorsGen
);
3831 emitScanBasedDirective(CGF
, S
, NumIteratorsGen
, FirstGen
, SecondGen
);
3832 if (!isOpenMPParallelDirective(S
.getDirectiveKind()))
3833 emitScanBasedDirectiveFinals(CGF
, S
, NumIteratorsGen
);
3835 CodeGenFunction::OMPCancelStackRAII
CancelRegion(CGF
, S
.getDirectiveKind(),
3837 HasLastprivates
= CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(),
3839 emitDispatchForLoopBounds
);
3841 return HasLastprivates
;
3844 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective
&S
) {
3847 for (OMPClause
*C
: S
.clauses()) {
3848 if (isa
<OMPNowaitClause
>(C
))
3851 if (auto *SC
= dyn_cast
<OMPScheduleClause
>(C
)) {
3852 if (SC
->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown
)
3854 if (SC
->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown
)
3856 switch (SC
->getScheduleKind()) {
3857 case OMPC_SCHEDULE_auto
:
3858 case OMPC_SCHEDULE_dynamic
:
3859 case OMPC_SCHEDULE_runtime
:
3860 case OMPC_SCHEDULE_guided
:
3861 case OMPC_SCHEDULE_static
:
3863 case OMPC_SCHEDULE_unknown
:
3874 static llvm::omp::ScheduleKind
3875 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind
) {
3876 switch (ScheduleClauseKind
) {
3877 case OMPC_SCHEDULE_unknown
:
3878 return llvm::omp::OMP_SCHEDULE_Default
;
3879 case OMPC_SCHEDULE_auto
:
3880 return llvm::omp::OMP_SCHEDULE_Auto
;
3881 case OMPC_SCHEDULE_dynamic
:
3882 return llvm::omp::OMP_SCHEDULE_Dynamic
;
3883 case OMPC_SCHEDULE_guided
:
3884 return llvm::omp::OMP_SCHEDULE_Guided
;
3885 case OMPC_SCHEDULE_runtime
:
3886 return llvm::omp::OMP_SCHEDULE_Runtime
;
3887 case OMPC_SCHEDULE_static
:
3888 return llvm::omp::OMP_SCHEDULE_Static
;
3890 llvm_unreachable("Unhandled schedule kind");
3893 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective
&S
) {
3894 bool HasLastprivates
= false;
3895 bool UseOMPIRBuilder
=
3896 CGM
.getLangOpts().OpenMPIRBuilder
&& isSupportedByOpenMPIRBuilder(S
);
3897 auto &&CodeGen
= [this, &S
, &HasLastprivates
,
3898 UseOMPIRBuilder
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3899 // Use the OpenMPIRBuilder if enabled.
3900 if (UseOMPIRBuilder
) {
3901 bool NeedsBarrier
= !S
.getSingleClause
<OMPNowaitClause
>();
3903 llvm::omp::ScheduleKind SchedKind
= llvm::omp::OMP_SCHEDULE_Default
;
3904 llvm::Value
*ChunkSize
= nullptr;
3905 if (auto *SchedClause
= S
.getSingleClause
<OMPScheduleClause
>()) {
3907 convertClauseKindToSchedKind(SchedClause
->getScheduleKind());
3908 if (const Expr
*ChunkSizeExpr
= SchedClause
->getChunkSize())
3909 ChunkSize
= EmitScalarExpr(ChunkSizeExpr
);
3912 // Emit the associated statement and get its loop representation.
3913 const Stmt
*Inner
= S
.getRawStmt();
3914 llvm::CanonicalLoopInfo
*CLI
=
3915 EmitOMPCollapsedCanonicalLoopNest(Inner
, 1);
3917 llvm::OpenMPIRBuilder
&OMPBuilder
=
3918 CGM
.getOpenMPRuntime().getOMPBuilder();
3919 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
3920 AllocaInsertPt
->getParent(), AllocaInsertPt
->getIterator());
3921 OMPBuilder
.applyWorkshareLoop(
3922 Builder
.getCurrentDebugLocation(), CLI
, AllocaIP
, NeedsBarrier
,
3923 SchedKind
, ChunkSize
, /*HasSimdModifier=*/false,
3924 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
3925 /*HasOrderedClause=*/false);
3929 HasLastprivates
= emitWorksharingDirective(CGF
, S
, S
.hasCancel());
3933 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
3934 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
3935 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for
, CodeGen
,
3939 if (!UseOMPIRBuilder
) {
3940 // Emit an implicit barrier at the end.
3941 if (!S
.getSingleClause
<OMPNowaitClause
>() || HasLastprivates
)
3942 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(), OMPD_for
);
3944 // Check for outer lastprivate conditional update.
3945 checkForLastprivateConditionalUpdate(*this, S
);
3948 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective
&S
) {
3949 bool HasLastprivates
= false;
3950 auto &&CodeGen
= [&S
, &HasLastprivates
](CodeGenFunction
&CGF
,
3951 PrePostActionTy
&) {
3952 HasLastprivates
= emitWorksharingDirective(CGF
, S
, /*HasCancel=*/false);
3956 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
3957 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
3958 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
, CodeGen
);
3961 // Emit an implicit barrier at the end.
3962 if (!S
.getSingleClause
<OMPNowaitClause
>() || HasLastprivates
)
3963 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(), OMPD_for
);
3964 // Check for outer lastprivate conditional update.
3965 checkForLastprivateConditionalUpdate(*this, S
);
3968 static LValue
createSectionLVal(CodeGenFunction
&CGF
, QualType Ty
,
3970 llvm::Value
*Init
= nullptr) {
3971 LValue LVal
= CGF
.MakeAddrLValue(CGF
.CreateMemTemp(Ty
, Name
), Ty
);
3973 CGF
.EmitStoreThroughLValue(RValue::get(Init
), LVal
, /*isInit*/ true);
3977 void CodeGenFunction::EmitSections(const OMPExecutableDirective
&S
) {
3978 const Stmt
*CapturedStmt
= S
.getInnermostCapturedStmt()->getCapturedStmt();
3979 const auto *CS
= dyn_cast
<CompoundStmt
>(CapturedStmt
);
3980 bool HasLastprivates
= false;
3981 auto &&CodeGen
= [&S
, CapturedStmt
, CS
,
3982 &HasLastprivates
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3983 const ASTContext
&C
= CGF
.getContext();
3984 QualType KmpInt32Ty
=
3985 C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3986 // Emit helper vars inits.
3987 LValue LB
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.lb.",
3988 CGF
.Builder
.getInt32(0));
3989 llvm::ConstantInt
*GlobalUBVal
= CS
!= nullptr
3990 ? CGF
.Builder
.getInt32(CS
->size() - 1)
3991 : CGF
.Builder
.getInt32(0);
3993 createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.ub.", GlobalUBVal
);
3994 LValue ST
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.st.",
3995 CGF
.Builder
.getInt32(1));
3996 LValue IL
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.il.",
3997 CGF
.Builder
.getInt32(0));
3999 LValue IV
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.iv.");
4000 OpaqueValueExpr
IVRefExpr(S
.getBeginLoc(), KmpInt32Ty
, VK_LValue
);
4001 CodeGenFunction::OpaqueValueMapping
OpaqueIV(CGF
, &IVRefExpr
, IV
);
4002 OpaqueValueExpr
UBRefExpr(S
.getBeginLoc(), KmpInt32Ty
, VK_LValue
);
4003 CodeGenFunction::OpaqueValueMapping
OpaqueUB(CGF
, &UBRefExpr
, UB
);
4004 // Generate condition for loop.
4005 BinaryOperator
*Cond
= BinaryOperator::Create(
4006 C
, &IVRefExpr
, &UBRefExpr
, BO_LE
, C
.BoolTy
, VK_PRValue
, OK_Ordinary
,
4007 S
.getBeginLoc(), FPOptionsOverride());
4008 // Increment for loop counter.
4009 UnaryOperator
*Inc
= UnaryOperator::Create(
4010 C
, &IVRefExpr
, UO_PreInc
, KmpInt32Ty
, VK_PRValue
, OK_Ordinary
,
4011 S
.getBeginLoc(), true, FPOptionsOverride());
4012 auto &&BodyGen
= [CapturedStmt
, CS
, &S
, &IV
](CodeGenFunction
&CGF
) {
4013 // Iterate through all sections and emit a switch construct:
4016 // <SectionStmt[0]>;
4019 // case <NumSection> - 1:
4020 // <SectionStmt[<NumSection> - 1]>;
4023 // .omp.sections.exit:
4024 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".omp.sections.exit");
4025 llvm::SwitchInst
*SwitchStmt
=
4026 CGF
.Builder
.CreateSwitch(CGF
.EmitLoadOfScalar(IV
, S
.getBeginLoc()),
4027 ExitBB
, CS
== nullptr ? 1 : CS
->size());
4029 unsigned CaseNumber
= 0;
4030 for (const Stmt
*SubStmt
: CS
->children()) {
4031 auto CaseBB
= CGF
.createBasicBlock(".omp.sections.case");
4032 CGF
.EmitBlock(CaseBB
);
4033 SwitchStmt
->addCase(CGF
.Builder
.getInt32(CaseNumber
), CaseBB
);
4034 CGF
.EmitStmt(SubStmt
);
4035 CGF
.EmitBranch(ExitBB
);
4039 llvm::BasicBlock
*CaseBB
= CGF
.createBasicBlock(".omp.sections.case");
4040 CGF
.EmitBlock(CaseBB
);
4041 SwitchStmt
->addCase(CGF
.Builder
.getInt32(0), CaseBB
);
4042 CGF
.EmitStmt(CapturedStmt
);
4043 CGF
.EmitBranch(ExitBB
);
4045 CGF
.EmitBlock(ExitBB
, /*IsFinished=*/true);
4048 CodeGenFunction::OMPPrivateScope
LoopScope(CGF
);
4049 if (CGF
.EmitOMPFirstprivateClause(S
, LoopScope
)) {
4050 // Emit implicit barrier to synchronize threads and avoid data races on
4051 // initialization of firstprivate variables and post-update of lastprivate
4053 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(
4054 CGF
, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
4055 /*ForceSimpleCall=*/true);
4057 CGF
.EmitOMPPrivateClause(S
, LoopScope
);
4058 CGOpenMPRuntime::LastprivateConditionalRAII
LPCRegion(CGF
, S
, IV
);
4059 HasLastprivates
= CGF
.EmitOMPLastprivateClauseInit(S
, LoopScope
);
4060 CGF
.EmitOMPReductionClauseInit(S
, LoopScope
);
4061 (void)LoopScope
.Privatize();
4062 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
4063 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
4065 // Emit static non-chunked loop.
4066 OpenMPScheduleTy ScheduleKind
;
4067 ScheduleKind
.Schedule
= OMPC_SCHEDULE_static
;
4068 CGOpenMPRuntime::StaticRTInput
StaticInit(
4069 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL
.getAddress(CGF
),
4070 LB
.getAddress(CGF
), UB
.getAddress(CGF
), ST
.getAddress(CGF
));
4071 CGF
.CGM
.getOpenMPRuntime().emitForStaticInit(
4072 CGF
, S
.getBeginLoc(), S
.getDirectiveKind(), ScheduleKind
, StaticInit
);
4073 // UB = min(UB, GlobalUB);
4074 llvm::Value
*UBVal
= CGF
.EmitLoadOfScalar(UB
, S
.getBeginLoc());
4075 llvm::Value
*MinUBGlobalUB
= CGF
.Builder
.CreateSelect(
4076 CGF
.Builder
.CreateICmpSLT(UBVal
, GlobalUBVal
), UBVal
, GlobalUBVal
);
4077 CGF
.EmitStoreOfScalar(MinUBGlobalUB
, UB
);
4079 CGF
.EmitStoreOfScalar(CGF
.EmitLoadOfScalar(LB
, S
.getBeginLoc()), IV
);
4080 // while (idx <= UB) { BODY; ++idx; }
4081 CGF
.EmitOMPInnerLoop(S
, /*RequiresCleanup=*/false, Cond
, Inc
, BodyGen
,
4082 [](CodeGenFunction
&) {});
4083 // Tell the runtime we are done.
4084 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
) {
4085 CGF
.CGM
.getOpenMPRuntime().emitForStaticFinish(CGF
, S
.getEndLoc(),
4086 S
.getDirectiveKind());
4088 CGF
.OMPCancelStack
.emitExit(CGF
, S
.getDirectiveKind(), CodeGen
);
4089 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
4090 // Emit post-update of the reduction variables if IsLastIter != 0.
4091 emitPostUpdateForReductionClause(CGF
, S
, [IL
, &S
](CodeGenFunction
&CGF
) {
4092 return CGF
.Builder
.CreateIsNotNull(
4093 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
4096 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4097 if (HasLastprivates
)
4098 CGF
.EmitOMPLastprivateClauseFinal(
4099 S
, /*NoFinals=*/false,
4100 CGF
.Builder
.CreateIsNotNull(
4101 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc())));
4104 bool HasCancel
= false;
4105 if (auto *OSD
= dyn_cast
<OMPSectionsDirective
>(&S
))
4106 HasCancel
= OSD
->hasCancel();
4107 else if (auto *OPSD
= dyn_cast
<OMPParallelSectionsDirective
>(&S
))
4108 HasCancel
= OPSD
->hasCancel();
4109 OMPCancelStackRAII
CancelRegion(*this, S
.getDirectiveKind(), HasCancel
);
4110 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections
, CodeGen
,
4112 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4113 // clause. Otherwise the barrier will be generated by the codegen for the
4115 if (HasLastprivates
&& S
.getSingleClause
<OMPNowaitClause
>()) {
4116 // Emit implicit barrier to synchronize threads and avoid data races on
4117 // initialization of firstprivate variables.
4118 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(),
4123 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective
&S
) {
4124 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4125 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4126 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4127 using BodyGenCallbackTy
= llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy
;
4129 auto FiniCB
= [this](InsertPointTy IP
) {
4130 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4133 const CapturedStmt
*ICS
= S
.getInnermostCapturedStmt();
4134 const Stmt
*CapturedStmt
= S
.getInnermostCapturedStmt()->getCapturedStmt();
4135 const auto *CS
= dyn_cast
<CompoundStmt
>(CapturedStmt
);
4136 llvm::SmallVector
<BodyGenCallbackTy
, 4> SectionCBVector
;
4138 for (const Stmt
*SubStmt
: CS
->children()) {
4139 auto SectionCB
= [this, SubStmt
](InsertPointTy AllocaIP
,
4140 InsertPointTy CodeGenIP
) {
4141 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4142 *this, SubStmt
, AllocaIP
, CodeGenIP
, "section");
4144 SectionCBVector
.push_back(SectionCB
);
4147 auto SectionCB
= [this, CapturedStmt
](InsertPointTy AllocaIP
,
4148 InsertPointTy CodeGenIP
) {
4149 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4150 *this, CapturedStmt
, AllocaIP
, CodeGenIP
, "section");
4152 SectionCBVector
.push_back(SectionCB
);
4155 // Privatization callback that performs appropriate action for
4156 // shared/private/firstprivate/lastprivate/copyin/... variables.
4158 // TODO: This defaults to shared right now.
4159 auto PrivCB
= [](InsertPointTy AllocaIP
, InsertPointTy CodeGenIP
,
4160 llvm::Value
&, llvm::Value
&Val
, llvm::Value
*&ReplVal
) {
4161 // The next line is appropriate only for variables (Val) with the
4162 // data-sharing attribute "shared".
4168 CGCapturedStmtInfo
CGSI(*ICS
, CR_OpenMP
);
4169 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(*this, &CGSI
);
4170 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
4171 AllocaInsertPt
->getParent(), AllocaInsertPt
->getIterator());
4172 Builder
.restoreIP(OMPBuilder
.createSections(
4173 Builder
, AllocaIP
, SectionCBVector
, PrivCB
, FiniCB
, S
.hasCancel(),
4174 S
.getSingleClause
<OMPNowaitClause
>()));
4179 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4180 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
4183 // Emit an implicit barrier at the end.
4184 if (!S
.getSingleClause
<OMPNowaitClause
>()) {
4185 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(),
4188 // Check for outer lastprivate conditional update.
4189 checkForLastprivateConditionalUpdate(*this, S
);
4192 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective
&S
) {
4193 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4194 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4195 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4197 const Stmt
*SectionRegionBodyStmt
= S
.getAssociatedStmt();
4198 auto FiniCB
= [this](InsertPointTy IP
) {
4199 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4202 auto BodyGenCB
= [SectionRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4203 InsertPointTy CodeGenIP
) {
4204 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4205 *this, SectionRegionBodyStmt
, AllocaIP
, CodeGenIP
, "section");
4208 LexicalScope
Scope(*this, S
.getSourceRange());
4210 Builder
.restoreIP(OMPBuilder
.createSection(Builder
, BodyGenCB
, FiniCB
));
4214 LexicalScope
Scope(*this, S
.getSourceRange());
4216 EmitStmt(S
.getAssociatedStmt());
4219 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective
&S
) {
4220 llvm::SmallVector
<const Expr
*, 8> CopyprivateVars
;
4221 llvm::SmallVector
<const Expr
*, 8> DestExprs
;
4222 llvm::SmallVector
<const Expr
*, 8> SrcExprs
;
4223 llvm::SmallVector
<const Expr
*, 8> AssignmentOps
;
4224 // Check if there are any 'copyprivate' clauses associated with this
4225 // 'single' construct.
4226 // Build a list of copyprivate variables along with helper expressions
4227 // (<source>, <destination>, <destination>=<source> expressions)
4228 for (const auto *C
: S
.getClausesOfKind
<OMPCopyprivateClause
>()) {
4229 CopyprivateVars
.append(C
->varlists().begin(), C
->varlists().end());
4230 DestExprs
.append(C
->destination_exprs().begin(),
4231 C
->destination_exprs().end());
4232 SrcExprs
.append(C
->source_exprs().begin(), C
->source_exprs().end());
4233 AssignmentOps
.append(C
->assignment_ops().begin(),
4234 C
->assignment_ops().end());
4236 // Emit code for 'single' region along with 'copyprivate' clauses
4237 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4239 OMPPrivateScope
SingleScope(CGF
);
4240 (void)CGF
.EmitOMPFirstprivateClause(S
, SingleScope
);
4241 CGF
.EmitOMPPrivateClause(S
, SingleScope
);
4242 (void)SingleScope
.Privatize();
4243 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
4247 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4248 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
4249 CGM
.getOpenMPRuntime().emitSingleRegion(*this, CodeGen
, S
.getBeginLoc(),
4250 CopyprivateVars
, DestExprs
,
4251 SrcExprs
, AssignmentOps
);
4253 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4254 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4255 if (!S
.getSingleClause
<OMPNowaitClause
>() && CopyprivateVars
.empty()) {
4256 CGM
.getOpenMPRuntime().emitBarrierCall(
4257 *this, S
.getBeginLoc(),
4258 S
.getSingleClause
<OMPNowaitClause
>() ? OMPD_unknown
: OMPD_single
);
4260 // Check for outer lastprivate conditional update.
4261 checkForLastprivateConditionalUpdate(*this, S
);
4264 static void emitMaster(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
4265 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4267 CGF
.EmitStmt(S
.getRawStmt());
4269 CGF
.CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, CodeGen
, S
.getBeginLoc());
4272 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective
&S
) {
4273 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4274 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4275 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4277 const Stmt
*MasterRegionBodyStmt
= S
.getAssociatedStmt();
4279 auto FiniCB
= [this](InsertPointTy IP
) {
4280 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4283 auto BodyGenCB
= [MasterRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4284 InsertPointTy CodeGenIP
) {
4285 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4286 *this, MasterRegionBodyStmt
, AllocaIP
, CodeGenIP
, "master");
4289 LexicalScope
Scope(*this, S
.getSourceRange());
4291 Builder
.restoreIP(OMPBuilder
.createMaster(Builder
, BodyGenCB
, FiniCB
));
4295 LexicalScope
Scope(*this, S
.getSourceRange());
4297 emitMaster(*this, S
);
4300 static void emitMasked(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
4301 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4303 CGF
.EmitStmt(S
.getRawStmt());
4305 Expr
*Filter
= nullptr;
4306 if (const auto *FilterClause
= S
.getSingleClause
<OMPFilterClause
>())
4307 Filter
= FilterClause
->getThreadID();
4308 CGF
.CGM
.getOpenMPRuntime().emitMaskedRegion(CGF
, CodeGen
, S
.getBeginLoc(),
4312 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective
&S
) {
4313 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4314 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4315 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4317 const Stmt
*MaskedRegionBodyStmt
= S
.getAssociatedStmt();
4318 const Expr
*Filter
= nullptr;
4319 if (const auto *FilterClause
= S
.getSingleClause
<OMPFilterClause
>())
4320 Filter
= FilterClause
->getThreadID();
4321 llvm::Value
*FilterVal
= Filter
4322 ? EmitScalarExpr(Filter
, CGM
.Int32Ty
)
4323 : llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/0);
4325 auto FiniCB
= [this](InsertPointTy IP
) {
4326 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4329 auto BodyGenCB
= [MaskedRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4330 InsertPointTy CodeGenIP
) {
4331 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4332 *this, MaskedRegionBodyStmt
, AllocaIP
, CodeGenIP
, "masked");
4335 LexicalScope
Scope(*this, S
.getSourceRange());
4338 OMPBuilder
.createMasked(Builder
, BodyGenCB
, FiniCB
, FilterVal
));
4342 LexicalScope
Scope(*this, S
.getSourceRange());
4344 emitMasked(*this, S
);
4347 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective
&S
) {
4348 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4349 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4350 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4352 const Stmt
*CriticalRegionBodyStmt
= S
.getAssociatedStmt();
4353 const Expr
*Hint
= nullptr;
4354 if (const auto *HintClause
= S
.getSingleClause
<OMPHintClause
>())
4355 Hint
= HintClause
->getHint();
4357 // TODO: This is slightly different from what's currently being done in
4358 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4359 // about typing is final.
4360 llvm::Value
*HintInst
= nullptr;
4363 Builder
.CreateIntCast(EmitScalarExpr(Hint
), CGM
.Int32Ty
, false);
4365 auto FiniCB
= [this](InsertPointTy IP
) {
4366 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4369 auto BodyGenCB
= [CriticalRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4370 InsertPointTy CodeGenIP
) {
4371 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4372 *this, CriticalRegionBodyStmt
, AllocaIP
, CodeGenIP
, "critical");
4375 LexicalScope
Scope(*this, S
.getSourceRange());
4377 Builder
.restoreIP(OMPBuilder
.createCritical(
4378 Builder
, BodyGenCB
, FiniCB
, S
.getDirectiveName().getAsString(),
4384 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4386 CGF
.EmitStmt(S
.getAssociatedStmt());
4388 const Expr
*Hint
= nullptr;
4389 if (const auto *HintClause
= S
.getSingleClause
<OMPHintClause
>())
4390 Hint
= HintClause
->getHint();
4391 LexicalScope
Scope(*this, S
.getSourceRange());
4393 CGM
.getOpenMPRuntime().emitCriticalRegion(*this,
4394 S
.getDirectiveName().getAsString(),
4395 CodeGen
, S
.getBeginLoc(), Hint
);
4398 void CodeGenFunction::EmitOMPParallelForDirective(
4399 const OMPParallelForDirective
&S
) {
4400 // Emit directive as a combined directive that consists of two implicit
4401 // directives: 'parallel' with 'for' directive.
4402 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4404 emitOMPCopyinClause(CGF
, S
);
4405 (void)emitWorksharingDirective(CGF
, S
, S
.hasCancel());
4408 const auto &&NumIteratorsGen
= [&S
](CodeGenFunction
&CGF
) {
4409 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
4410 CGCapturedStmtInfo
CGSI(CR_OpenMP
);
4411 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGSI
);
4412 OMPLoopScope
LoopScope(CGF
, S
);
4413 return CGF
.EmitScalarExpr(S
.getNumIterations());
4415 bool IsInscan
= llvm::any_of(S
.getClausesOfKind
<OMPReductionClause
>(),
4416 [](const OMPReductionClause
*C
) {
4417 return C
->getModifier() == OMPC_REDUCTION_inscan
;
4420 emitScanBasedDirectiveDecls(*this, S
, NumIteratorsGen
);
4422 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4423 emitCommonOMPParallelDirective(*this, S
, OMPD_for
, CodeGen
,
4424 emitEmptyBoundParameters
);
4426 emitScanBasedDirectiveFinals(*this, S
, NumIteratorsGen
);
4428 // Check for outer lastprivate conditional update.
4429 checkForLastprivateConditionalUpdate(*this, S
);
4432 void CodeGenFunction::EmitOMPParallelForSimdDirective(
4433 const OMPParallelForSimdDirective
&S
) {
4434 // Emit directive as a combined directive that consists of two implicit
4435 // directives: 'parallel' with 'for' directive.
4436 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4438 emitOMPCopyinClause(CGF
, S
);
4439 (void)emitWorksharingDirective(CGF
, S
, /*HasCancel=*/false);
4442 const auto &&NumIteratorsGen
= [&S
](CodeGenFunction
&CGF
) {
4443 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
4444 CGCapturedStmtInfo
CGSI(CR_OpenMP
);
4445 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGSI
);
4446 OMPLoopScope
LoopScope(CGF
, S
);
4447 return CGF
.EmitScalarExpr(S
.getNumIterations());
4449 bool IsInscan
= llvm::any_of(S
.getClausesOfKind
<OMPReductionClause
>(),
4450 [](const OMPReductionClause
*C
) {
4451 return C
->getModifier() == OMPC_REDUCTION_inscan
;
4454 emitScanBasedDirectiveDecls(*this, S
, NumIteratorsGen
);
4456 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4457 emitCommonOMPParallelDirective(*this, S
, OMPD_for_simd
, CodeGen
,
4458 emitEmptyBoundParameters
);
4460 emitScanBasedDirectiveFinals(*this, S
, NumIteratorsGen
);
4462 // Check for outer lastprivate conditional update.
4463 checkForLastprivateConditionalUpdate(*this, S
);
4466 void CodeGenFunction::EmitOMPParallelMasterDirective(
4467 const OMPParallelMasterDirective
&S
) {
4468 // Emit directive as a combined directive that consists of two implicit
4469 // directives: 'parallel' with 'master' directive.
4470 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4472 OMPPrivateScope
PrivateScope(CGF
);
4473 emitOMPCopyinClause(CGF
, S
);
4474 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
4475 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
4476 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
4477 (void)PrivateScope
.Privatize();
4479 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
4483 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4484 emitCommonOMPParallelDirective(*this, S
, OMPD_master
, CodeGen
,
4485 emitEmptyBoundParameters
);
4486 emitPostUpdateForReductionClause(*this, S
,
4487 [](CodeGenFunction
&) { return nullptr; });
4489 // Check for outer lastprivate conditional update.
4490 checkForLastprivateConditionalUpdate(*this, S
);
4493 void CodeGenFunction::EmitOMPParallelMaskedDirective(
4494 const OMPParallelMaskedDirective
&S
) {
4495 // Emit directive as a combined directive that consists of two implicit
4496 // directives: 'parallel' with 'masked' directive.
4497 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4499 OMPPrivateScope
PrivateScope(CGF
);
4500 emitOMPCopyinClause(CGF
, S
);
4501 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
4502 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
4503 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
4504 (void)PrivateScope
.Privatize();
4506 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
4510 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4511 emitCommonOMPParallelDirective(*this, S
, OMPD_masked
, CodeGen
,
4512 emitEmptyBoundParameters
);
4513 emitPostUpdateForReductionClause(*this, S
,
4514 [](CodeGenFunction
&) { return nullptr; });
4516 // Check for outer lastprivate conditional update.
4517 checkForLastprivateConditionalUpdate(*this, S
);
4520 void CodeGenFunction::EmitOMPParallelSectionsDirective(
4521 const OMPParallelSectionsDirective
&S
) {
4522 // Emit directive as a combined directive that consists of two implicit
4523 // directives: 'parallel' with 'sections' directive.
4524 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4526 emitOMPCopyinClause(CGF
, S
);
4527 CGF
.EmitSections(S
);
4531 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4532 emitCommonOMPParallelDirective(*this, S
, OMPD_sections
, CodeGen
,
4533 emitEmptyBoundParameters
);
4535 // Check for outer lastprivate conditional update.
4536 checkForLastprivateConditionalUpdate(*this, S
);
4540 /// Get the list of variables declared in the context of the untied tasks.
4541 class CheckVarsEscapingUntiedTaskDeclContext final
4542 : public ConstStmtVisitor
<CheckVarsEscapingUntiedTaskDeclContext
> {
4543 llvm::SmallVector
<const VarDecl
*, 4> PrivateDecls
;
4546 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4547 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4548 void VisitDeclStmt(const DeclStmt
*S
) {
4551 // Need to privatize only local vars, static locals can be processed as is.
4552 for (const Decl
*D
: S
->decls()) {
4553 if (const auto *VD
= dyn_cast_or_null
<VarDecl
>(D
))
4554 if (VD
->hasLocalStorage())
4555 PrivateDecls
.push_back(VD
);
4558 void VisitOMPExecutableDirective(const OMPExecutableDirective
*) {}
4559 void VisitCapturedStmt(const CapturedStmt
*) {}
4560 void VisitLambdaExpr(const LambdaExpr
*) {}
4561 void VisitBlockExpr(const BlockExpr
*) {}
4562 void VisitStmt(const Stmt
*S
) {
4565 for (const Stmt
*Child
: S
->children())
4570 /// Swaps list of vars with the provided one.
4571 ArrayRef
<const VarDecl
*> getPrivateDecls() const { return PrivateDecls
; }
4573 } // anonymous namespace
4575 static void buildDependences(const OMPExecutableDirective
&S
,
4576 OMPTaskDataTy
&Data
) {
4578 // First look for 'omp_all_memory' and add this first.
4579 bool OmpAllMemory
= false;
4581 S
.getClausesOfKind
<OMPDependClause
>(), [](const OMPDependClause
*C
) {
4582 return C
->getDependencyKind() == OMPC_DEPEND_outallmemory
||
4583 C
->getDependencyKind() == OMPC_DEPEND_inoutallmemory
;
4585 OmpAllMemory
= true;
4586 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4587 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4589 OMPTaskDataTy::DependData
&DD
=
4590 Data
.Dependences
.emplace_back(OMPC_DEPEND_outallmemory
,
4591 /*IteratorExpr=*/nullptr);
4592 // Add a nullptr Expr to simplify the codegen in emitDependData.
4593 DD
.DepExprs
.push_back(nullptr);
4595 // Add remaining dependences skipping any 'out' or 'inout' if they are
4596 // overridden by 'omp_all_memory'.
4597 for (const auto *C
: S
.getClausesOfKind
<OMPDependClause
>()) {
4598 OpenMPDependClauseKind Kind
= C
->getDependencyKind();
4599 if (Kind
== OMPC_DEPEND_outallmemory
|| Kind
== OMPC_DEPEND_inoutallmemory
)
4601 if (OmpAllMemory
&& (Kind
== OMPC_DEPEND_out
|| Kind
== OMPC_DEPEND_inout
))
4603 OMPTaskDataTy::DependData
&DD
=
4604 Data
.Dependences
.emplace_back(C
->getDependencyKind(), C
->getModifier());
4605 DD
.DepExprs
.append(C
->varlist_begin(), C
->varlist_end());
4609 void CodeGenFunction::EmitOMPTaskBasedDirective(
4610 const OMPExecutableDirective
&S
, const OpenMPDirectiveKind CapturedRegion
,
4611 const RegionCodeGenTy
&BodyGen
, const TaskGenTy
&TaskGen
,
4612 OMPTaskDataTy
&Data
) {
4613 // Emit outlined function for task construct.
4614 const CapturedStmt
*CS
= S
.getCapturedStmt(CapturedRegion
);
4615 auto I
= CS
->getCapturedDecl()->param_begin();
4616 auto PartId
= std::next(I
);
4617 auto TaskT
= std::next(I
, 4);
4618 // Check if the task is final
4619 if (const auto *Clause
= S
.getSingleClause
<OMPFinalClause
>()) {
4620 // If the condition constant folds and can be elided, try to avoid emitting
4621 // the condition and the dead arm of the if/else.
4622 const Expr
*Cond
= Clause
->getCondition();
4624 if (ConstantFoldsToSimpleInteger(Cond
, CondConstant
))
4625 Data
.Final
.setInt(CondConstant
);
4627 Data
.Final
.setPointer(EvaluateExprAsBool(Cond
));
4629 // By default the task is not final.
4630 Data
.Final
.setInt(/*IntVal=*/false);
4632 // Check if the task has 'priority' clause.
4633 if (const auto *Clause
= S
.getSingleClause
<OMPPriorityClause
>()) {
4634 const Expr
*Prio
= Clause
->getPriority();
4635 Data
.Priority
.setInt(/*IntVal=*/true);
4636 Data
.Priority
.setPointer(EmitScalarConversion(
4637 EmitScalarExpr(Prio
), Prio
->getType(),
4638 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4639 Prio
->getExprLoc()));
4641 // The first function argument for tasks is a thread id, the second one is a
4642 // part id (0 for tied tasks, >=0 for untied task).
4643 llvm::DenseSet
<const VarDecl
*> EmittedAsPrivate
;
4644 // Get list of private variables.
4645 for (const auto *C
: S
.getClausesOfKind
<OMPPrivateClause
>()) {
4646 auto IRef
= C
->varlist_begin();
4647 for (const Expr
*IInit
: C
->private_copies()) {
4648 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
4649 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
4650 Data
.PrivateVars
.push_back(*IRef
);
4651 Data
.PrivateCopies
.push_back(IInit
);
4656 EmittedAsPrivate
.clear();
4657 // Get list of firstprivate variables.
4658 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
4659 auto IRef
= C
->varlist_begin();
4660 auto IElemInitRef
= C
->inits().begin();
4661 for (const Expr
*IInit
: C
->private_copies()) {
4662 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
4663 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
4664 Data
.FirstprivateVars
.push_back(*IRef
);
4665 Data
.FirstprivateCopies
.push_back(IInit
);
4666 Data
.FirstprivateInits
.push_back(*IElemInitRef
);
4672 // Get list of lastprivate variables (for taskloops).
4673 llvm::MapVector
<const VarDecl
*, const DeclRefExpr
*> LastprivateDstsOrigs
;
4674 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
4675 auto IRef
= C
->varlist_begin();
4676 auto ID
= C
->destination_exprs().begin();
4677 for (const Expr
*IInit
: C
->private_copies()) {
4678 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
4679 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
4680 Data
.LastprivateVars
.push_back(*IRef
);
4681 Data
.LastprivateCopies
.push_back(IInit
);
4683 LastprivateDstsOrigs
.insert(
4684 std::make_pair(cast
<VarDecl
>(cast
<DeclRefExpr
>(*ID
)->getDecl()),
4685 cast
<DeclRefExpr
>(*IRef
)));
4690 SmallVector
<const Expr
*, 4> LHSs
;
4691 SmallVector
<const Expr
*, 4> RHSs
;
4692 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
4693 Data
.ReductionVars
.append(C
->varlist_begin(), C
->varlist_end());
4694 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
4695 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
4696 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
4697 C
->reduction_ops().end());
4698 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
4699 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
4701 Data
.Reductions
= CGM
.getOpenMPRuntime().emitTaskReductionInit(
4702 *this, S
.getBeginLoc(), LHSs
, RHSs
, Data
);
4703 // Build list of dependences.
4704 buildDependences(S
, Data
);
4705 // Get list of local vars for untied tasks.
4707 CheckVarsEscapingUntiedTaskDeclContext Checker
;
4708 Checker
.Visit(S
.getInnermostCapturedStmt()->getCapturedStmt());
4709 Data
.PrivateLocals
.append(Checker
.getPrivateDecls().begin(),
4710 Checker
.getPrivateDecls().end());
4712 auto &&CodeGen
= [&Data
, &S
, CS
, &BodyGen
, &LastprivateDstsOrigs
,
4713 CapturedRegion
](CodeGenFunction
&CGF
,
4714 PrePostActionTy
&Action
) {
4715 llvm::MapVector
<CanonicalDeclPtr
<const VarDecl
>,
4716 std::pair
<Address
, Address
>>
4718 // Set proper addresses for generated private copies.
4719 OMPPrivateScope
Scope(CGF
);
4720 // Generate debug info for variables present in shared clause.
4721 if (auto *DI
= CGF
.getDebugInfo()) {
4722 llvm::SmallDenseMap
<const VarDecl
*, FieldDecl
*> CaptureFields
=
4723 CGF
.CapturedStmtInfo
->getCaptureFields();
4724 llvm::Value
*ContextValue
= CGF
.CapturedStmtInfo
->getContextValue();
4725 if (CaptureFields
.size() && ContextValue
) {
4726 unsigned CharWidth
= CGF
.getContext().getCharWidth();
4727 // The shared variables are packed together as members of structure.
4728 // So the address of each shared variable can be computed by adding
4729 // offset of it (within record) to the base address of record. For each
4730 // shared variable, debug intrinsic llvm.dbg.declare is generated with
4731 // appropriate expressions (DIExpression).
4733 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4734 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4736 // metadata !DIExpression(DW_OP_deref))
4737 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4739 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4740 for (auto It
= CaptureFields
.begin(); It
!= CaptureFields
.end(); ++It
) {
4741 const VarDecl
*SharedVar
= It
->first
;
4742 RecordDecl
*CaptureRecord
= It
->second
->getParent();
4743 const ASTRecordLayout
&Layout
=
4744 CGF
.getContext().getASTRecordLayout(CaptureRecord
);
4746 Layout
.getFieldOffset(It
->second
->getFieldIndex()) / CharWidth
;
4747 if (CGF
.CGM
.getCodeGenOpts().hasReducedDebugInfo())
4748 (void)DI
->EmitDeclareOfAutoVariable(SharedVar
, ContextValue
,
4749 CGF
.Builder
, false);
4750 llvm::Instruction
&Last
= CGF
.Builder
.GetInsertBlock()->back();
4751 // Get the call dbg.declare instruction we just created and update
4752 // its DIExpression to add offset to base address.
4753 if (auto DDI
= dyn_cast
<llvm::DbgVariableIntrinsic
>(&Last
)) {
4754 SmallVector
<uint64_t, 8> Ops
;
4755 // Add offset to the base address if non zero.
4757 Ops
.push_back(llvm::dwarf::DW_OP_plus_uconst
);
4758 Ops
.push_back(Offset
);
4760 Ops
.push_back(llvm::dwarf::DW_OP_deref
);
4761 auto &Ctx
= DDI
->getContext();
4762 llvm::DIExpression
*DIExpr
= llvm::DIExpression::get(Ctx
, Ops
);
4763 Last
.setOperand(2, llvm::MetadataAsValue::get(Ctx
, DIExpr
));
4768 llvm::SmallVector
<std::pair
<const VarDecl
*, Address
>, 16> FirstprivatePtrs
;
4769 if (!Data
.PrivateVars
.empty() || !Data
.FirstprivateVars
.empty() ||
4770 !Data
.LastprivateVars
.empty() || !Data
.PrivateLocals
.empty()) {
4771 enum { PrivatesParam
= 2, CopyFnParam
= 3 };
4772 llvm::Value
*CopyFn
= CGF
.Builder
.CreateLoad(
4773 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(CopyFnParam
)));
4774 llvm::Value
*PrivatesPtr
= CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(
4775 CS
->getCapturedDecl()->getParam(PrivatesParam
)));
4777 llvm::SmallVector
<std::pair
<const VarDecl
*, Address
>, 16> PrivatePtrs
;
4778 llvm::SmallVector
<llvm::Value
*, 16> CallArgs
;
4779 llvm::SmallVector
<llvm::Type
*, 4> ParamTypes
;
4780 CallArgs
.push_back(PrivatesPtr
);
4781 ParamTypes
.push_back(PrivatesPtr
->getType());
4782 for (const Expr
*E
: Data
.PrivateVars
) {
4783 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
4784 Address PrivatePtr
= CGF
.CreateMemTemp(
4785 CGF
.getContext().getPointerType(E
->getType()), ".priv.ptr.addr");
4786 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
4787 CallArgs
.push_back(PrivatePtr
.getPointer());
4788 ParamTypes
.push_back(PrivatePtr
.getType());
4790 for (const Expr
*E
: Data
.FirstprivateVars
) {
4791 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
4792 Address PrivatePtr
=
4793 CGF
.CreateMemTemp(CGF
.getContext().getPointerType(E
->getType()),
4794 ".firstpriv.ptr.addr");
4795 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
4796 FirstprivatePtrs
.emplace_back(VD
, PrivatePtr
);
4797 CallArgs
.push_back(PrivatePtr
.getPointer());
4798 ParamTypes
.push_back(PrivatePtr
.getType());
4800 for (const Expr
*E
: Data
.LastprivateVars
) {
4801 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
4802 Address PrivatePtr
=
4803 CGF
.CreateMemTemp(CGF
.getContext().getPointerType(E
->getType()),
4804 ".lastpriv.ptr.addr");
4805 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
4806 CallArgs
.push_back(PrivatePtr
.getPointer());
4807 ParamTypes
.push_back(PrivatePtr
.getType());
4809 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
4810 QualType Ty
= VD
->getType().getNonReferenceType();
4811 if (VD
->getType()->isLValueReferenceType())
4812 Ty
= CGF
.getContext().getPointerType(Ty
);
4813 if (isAllocatableDecl(VD
))
4814 Ty
= CGF
.getContext().getPointerType(Ty
);
4815 Address PrivatePtr
= CGF
.CreateMemTemp(
4816 CGF
.getContext().getPointerType(Ty
), ".local.ptr.addr");
4817 auto Result
= UntiedLocalVars
.insert(
4818 std::make_pair(VD
, std::make_pair(PrivatePtr
, Address::invalid())));
4819 // If key exists update in place.
4820 if (Result
.second
== false)
4821 *Result
.first
= std::make_pair(
4822 VD
, std::make_pair(PrivatePtr
, Address::invalid()));
4823 CallArgs
.push_back(PrivatePtr
.getPointer());
4824 ParamTypes
.push_back(PrivatePtr
.getType());
4826 auto *CopyFnTy
= llvm::FunctionType::get(CGF
.Builder
.getVoidTy(),
4827 ParamTypes
, /*isVarArg=*/false);
4828 CopyFn
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4829 CopyFn
, CopyFnTy
->getPointerTo());
4830 CGF
.CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(
4831 CGF
, S
.getBeginLoc(), {CopyFnTy
, CopyFn
}, CallArgs
);
4832 for (const auto &Pair
: LastprivateDstsOrigs
) {
4833 const auto *OrigVD
= cast
<VarDecl
>(Pair
.second
->getDecl());
4834 DeclRefExpr
DRE(CGF
.getContext(), const_cast<VarDecl
*>(OrigVD
),
4835 /*RefersToEnclosingVariableOrCapture=*/
4836 CGF
.CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
4837 Pair
.second
->getType(), VK_LValue
,
4838 Pair
.second
->getExprLoc());
4839 Scope
.addPrivate(Pair
.first
, CGF
.EmitLValue(&DRE
).getAddress(CGF
));
4841 for (const auto &Pair
: PrivatePtrs
) {
4842 Address Replacement
= Address(
4843 CGF
.Builder
.CreateLoad(Pair
.second
),
4844 CGF
.ConvertTypeForMem(Pair
.first
->getType().getNonReferenceType()),
4845 CGF
.getContext().getDeclAlign(Pair
.first
));
4846 Scope
.addPrivate(Pair
.first
, Replacement
);
4847 if (auto *DI
= CGF
.getDebugInfo())
4848 if (CGF
.CGM
.getCodeGenOpts().hasReducedDebugInfo())
4849 (void)DI
->EmitDeclareOfAutoVariable(
4850 Pair
.first
, Pair
.second
.getPointer(), CGF
.Builder
,
4851 /*UsePointerValue*/ true);
4853 // Adjust mapping for internal locals by mapping actual memory instead of
4854 // a pointer to this memory.
4855 for (auto &Pair
: UntiedLocalVars
) {
4856 QualType VDType
= Pair
.first
->getType().getNonReferenceType();
4857 if (isAllocatableDecl(Pair
.first
)) {
4858 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(Pair
.second
.first
);
4859 Address
Replacement(
4861 CGF
.ConvertTypeForMem(CGF
.getContext().getPointerType(VDType
)),
4862 CGF
.getPointerAlign());
4863 Pair
.second
.first
= Replacement
;
4864 Ptr
= CGF
.Builder
.CreateLoad(Replacement
);
4865 Replacement
= Address(Ptr
, CGF
.ConvertTypeForMem(VDType
),
4866 CGF
.getContext().getDeclAlign(Pair
.first
));
4867 Pair
.second
.second
= Replacement
;
4869 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(Pair
.second
.first
);
4870 Address
Replacement(Ptr
, CGF
.ConvertTypeForMem(VDType
),
4871 CGF
.getContext().getDeclAlign(Pair
.first
));
4872 Pair
.second
.first
= Replacement
;
4876 if (Data
.Reductions
) {
4877 OMPPrivateScope
FirstprivateScope(CGF
);
4878 for (const auto &Pair
: FirstprivatePtrs
) {
4879 Address
Replacement(
4880 CGF
.Builder
.CreateLoad(Pair
.second
),
4881 CGF
.ConvertTypeForMem(Pair
.first
->getType().getNonReferenceType()),
4882 CGF
.getContext().getDeclAlign(Pair
.first
));
4883 FirstprivateScope
.addPrivate(Pair
.first
, Replacement
);
4885 (void)FirstprivateScope
.Privatize();
4886 OMPLexicalScope
LexScope(CGF
, S
, CapturedRegion
);
4887 ReductionCodeGen
RedCG(Data
.ReductionVars
, Data
.ReductionVars
,
4888 Data
.ReductionCopies
, Data
.ReductionOps
);
4889 llvm::Value
*ReductionsPtr
= CGF
.Builder
.CreateLoad(
4890 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(9)));
4891 for (unsigned Cnt
= 0, E
= Data
.ReductionVars
.size(); Cnt
< E
; ++Cnt
) {
4892 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
4893 RedCG
.emitAggregateType(CGF
, Cnt
);
4894 // FIXME: This must removed once the runtime library is fixed.
4895 // Emit required threadprivate variables for
4896 // initializer/combiner/finalizer.
4897 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
4899 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
4900 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
4902 Address(CGF
.EmitScalarConversion(
4903 Replacement
.getPointer(), CGF
.getContext().VoidPtrTy
,
4904 CGF
.getContext().getPointerType(
4905 Data
.ReductionCopies
[Cnt
]->getType()),
4906 Data
.ReductionCopies
[Cnt
]->getExprLoc()),
4907 CGF
.ConvertTypeForMem(Data
.ReductionCopies
[Cnt
]->getType()),
4908 Replacement
.getAlignment());
4909 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
4910 Scope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
4913 // Privatize all private variables except for in_reduction items.
4914 (void)Scope
.Privatize();
4915 SmallVector
<const Expr
*, 4> InRedVars
;
4916 SmallVector
<const Expr
*, 4> InRedPrivs
;
4917 SmallVector
<const Expr
*, 4> InRedOps
;
4918 SmallVector
<const Expr
*, 4> TaskgroupDescriptors
;
4919 for (const auto *C
: S
.getClausesOfKind
<OMPInReductionClause
>()) {
4920 auto IPriv
= C
->privates().begin();
4921 auto IRed
= C
->reduction_ops().begin();
4922 auto ITD
= C
->taskgroup_descriptors().begin();
4923 for (const Expr
*Ref
: C
->varlists()) {
4924 InRedVars
.emplace_back(Ref
);
4925 InRedPrivs
.emplace_back(*IPriv
);
4926 InRedOps
.emplace_back(*IRed
);
4927 TaskgroupDescriptors
.emplace_back(*ITD
);
4928 std::advance(IPriv
, 1);
4929 std::advance(IRed
, 1);
4930 std::advance(ITD
, 1);
4933 // Privatize in_reduction items here, because taskgroup descriptors must be
4934 // privatized earlier.
4935 OMPPrivateScope
InRedScope(CGF
);
4936 if (!InRedVars
.empty()) {
4937 ReductionCodeGen
RedCG(InRedVars
, InRedVars
, InRedPrivs
, InRedOps
);
4938 for (unsigned Cnt
= 0, E
= InRedVars
.size(); Cnt
< E
; ++Cnt
) {
4939 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
4940 RedCG
.emitAggregateType(CGF
, Cnt
);
4941 // The taskgroup descriptor variable is always implicit firstprivate and
4942 // privatized already during processing of the firstprivates.
4943 // FIXME: This must removed once the runtime library is fixed.
4944 // Emit required threadprivate variables for
4945 // initializer/combiner/finalizer.
4946 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
4948 llvm::Value
*ReductionsPtr
;
4949 if (const Expr
*TRExpr
= TaskgroupDescriptors
[Cnt
]) {
4950 ReductionsPtr
= CGF
.EmitLoadOfScalar(CGF
.EmitLValue(TRExpr
),
4951 TRExpr
->getExprLoc());
4953 ReductionsPtr
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4955 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
4956 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
4957 Replacement
= Address(
4958 CGF
.EmitScalarConversion(
4959 Replacement
.getPointer(), CGF
.getContext().VoidPtrTy
,
4960 CGF
.getContext().getPointerType(InRedPrivs
[Cnt
]->getType()),
4961 InRedPrivs
[Cnt
]->getExprLoc()),
4962 CGF
.ConvertTypeForMem(InRedPrivs
[Cnt
]->getType()),
4963 Replacement
.getAlignment());
4964 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
4965 InRedScope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
4968 (void)InRedScope
.Privatize();
4970 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII
LocalVarsScope(CGF
,
4975 llvm::Function
*OutlinedFn
= CGM
.getOpenMPRuntime().emitTaskOutlinedFunction(
4976 S
, *I
, *PartId
, *TaskT
, S
.getDirectiveKind(), CodeGen
, Data
.Tied
,
4977 Data
.NumberOfParts
);
4978 OMPLexicalScope
Scope(*this, S
, std::nullopt
,
4979 !isOpenMPParallelDirective(S
.getDirectiveKind()) &&
4980 !isOpenMPSimdDirective(S
.getDirectiveKind()));
4981 TaskGen(*this, OutlinedFn
, Data
);
4984 static ImplicitParamDecl
*
4985 createImplicitFirstprivateForType(ASTContext
&C
, OMPTaskDataTy
&Data
,
4986 QualType Ty
, CapturedDecl
*CD
,
4987 SourceLocation Loc
) {
4988 auto *OrigVD
= ImplicitParamDecl::Create(C
, CD
, Loc
, /*Id=*/nullptr, Ty
,
4989 ImplicitParamDecl::Other
);
4990 auto *OrigRef
= DeclRefExpr::Create(
4991 C
, NestedNameSpecifierLoc(), SourceLocation(), OrigVD
,
4992 /*RefersToEnclosingVariableOrCapture=*/false, Loc
, Ty
, VK_LValue
);
4993 auto *PrivateVD
= ImplicitParamDecl::Create(C
, CD
, Loc
, /*Id=*/nullptr, Ty
,
4994 ImplicitParamDecl::Other
);
4995 auto *PrivateRef
= DeclRefExpr::Create(
4996 C
, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD
,
4997 /*RefersToEnclosingVariableOrCapture=*/false, Loc
, Ty
, VK_LValue
);
4998 QualType ElemType
= C
.getBaseElementType(Ty
);
4999 auto *InitVD
= ImplicitParamDecl::Create(C
, CD
, Loc
, /*Id=*/nullptr, ElemType
,
5000 ImplicitParamDecl::Other
);
5001 auto *InitRef
= DeclRefExpr::Create(
5002 C
, NestedNameSpecifierLoc(), SourceLocation(), InitVD
,
5003 /*RefersToEnclosingVariableOrCapture=*/false, Loc
, ElemType
, VK_LValue
);
5004 PrivateVD
->setInitStyle(VarDecl::CInit
);
5005 PrivateVD
->setInit(ImplicitCastExpr::Create(C
, ElemType
, CK_LValueToRValue
,
5006 InitRef
, /*BasePath=*/nullptr,
5007 VK_PRValue
, FPOptionsOverride()));
5008 Data
.FirstprivateVars
.emplace_back(OrigRef
);
5009 Data
.FirstprivateCopies
.emplace_back(PrivateRef
);
5010 Data
.FirstprivateInits
.emplace_back(InitRef
);
5014 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5015 const OMPExecutableDirective
&S
, const RegionCodeGenTy
&BodyGen
,
5016 OMPTargetDataInfo
&InputInfo
) {
5017 // Emit outlined function for task construct.
5018 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_task
);
5019 Address CapturedStruct
= GenerateCapturedStmtArgument(*CS
);
5020 QualType SharedsTy
= getContext().getRecordType(CS
->getCapturedRecordDecl());
5021 auto I
= CS
->getCapturedDecl()->param_begin();
5022 auto PartId
= std::next(I
);
5023 auto TaskT
= std::next(I
, 4);
5025 // The task is not final.
5026 Data
.Final
.setInt(/*IntVal=*/false);
5027 // Get list of firstprivate variables.
5028 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
5029 auto IRef
= C
->varlist_begin();
5030 auto IElemInitRef
= C
->inits().begin();
5031 for (auto *IInit
: C
->private_copies()) {
5032 Data
.FirstprivateVars
.push_back(*IRef
);
5033 Data
.FirstprivateCopies
.push_back(IInit
);
5034 Data
.FirstprivateInits
.push_back(*IElemInitRef
);
5039 SmallVector
<const Expr
*, 4> LHSs
;
5040 SmallVector
<const Expr
*, 4> RHSs
;
5041 for (const auto *C
: S
.getClausesOfKind
<OMPInReductionClause
>()) {
5042 Data
.ReductionVars
.append(C
->varlist_begin(), C
->varlist_end());
5043 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
5044 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
5045 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
5046 C
->reduction_ops().end());
5047 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
5048 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
5050 OMPPrivateScope
TargetScope(*this);
5051 VarDecl
*BPVD
= nullptr;
5052 VarDecl
*PVD
= nullptr;
5053 VarDecl
*SVD
= nullptr;
5054 VarDecl
*MVD
= nullptr;
5055 if (InputInfo
.NumberOfTargetItems
> 0) {
5056 auto *CD
= CapturedDecl::Create(
5057 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5058 llvm::APInt
ArrSize(/*numBits=*/32, InputInfo
.NumberOfTargetItems
);
5059 QualType BaseAndPointerAndMapperType
= getContext().getConstantArrayType(
5060 getContext().VoidPtrTy
, ArrSize
, nullptr, ArrayType::Normal
,
5061 /*IndexTypeQuals=*/0);
5062 BPVD
= createImplicitFirstprivateForType(
5063 getContext(), Data
, BaseAndPointerAndMapperType
, CD
, S
.getBeginLoc());
5064 PVD
= createImplicitFirstprivateForType(
5065 getContext(), Data
, BaseAndPointerAndMapperType
, CD
, S
.getBeginLoc());
5066 QualType SizesType
= getContext().getConstantArrayType(
5067 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5068 ArrSize
, nullptr, ArrayType::Normal
,
5069 /*IndexTypeQuals=*/0);
5070 SVD
= createImplicitFirstprivateForType(getContext(), Data
, SizesType
, CD
,
5072 TargetScope
.addPrivate(BPVD
, InputInfo
.BasePointersArray
);
5073 TargetScope
.addPrivate(PVD
, InputInfo
.PointersArray
);
5074 TargetScope
.addPrivate(SVD
, InputInfo
.SizesArray
);
5075 // If there is no user-defined mapper, the mapper array will be nullptr. In
5076 // this case, we don't need to privatize it.
5077 if (!isa_and_nonnull
<llvm::ConstantPointerNull
>(
5078 InputInfo
.MappersArray
.getPointer())) {
5079 MVD
= createImplicitFirstprivateForType(
5080 getContext(), Data
, BaseAndPointerAndMapperType
, CD
, S
.getBeginLoc());
5081 TargetScope
.addPrivate(MVD
, InputInfo
.MappersArray
);
5084 (void)TargetScope
.Privatize();
5085 buildDependences(S
, Data
);
5086 auto &&CodeGen
= [&Data
, &S
, CS
, &BodyGen
, BPVD
, PVD
, SVD
, MVD
,
5087 &InputInfo
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5088 // Set proper addresses for generated private copies.
5089 OMPPrivateScope
Scope(CGF
);
5090 if (!Data
.FirstprivateVars
.empty()) {
5091 enum { PrivatesParam
= 2, CopyFnParam
= 3 };
5092 llvm::Value
*CopyFn
= CGF
.Builder
.CreateLoad(
5093 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(CopyFnParam
)));
5094 llvm::Value
*PrivatesPtr
= CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(
5095 CS
->getCapturedDecl()->getParam(PrivatesParam
)));
5097 llvm::SmallVector
<std::pair
<const VarDecl
*, Address
>, 16> PrivatePtrs
;
5098 llvm::SmallVector
<llvm::Value
*, 16> CallArgs
;
5099 llvm::SmallVector
<llvm::Type
*, 4> ParamTypes
;
5100 CallArgs
.push_back(PrivatesPtr
);
5101 ParamTypes
.push_back(PrivatesPtr
->getType());
5102 for (const Expr
*E
: Data
.FirstprivateVars
) {
5103 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
5104 Address PrivatePtr
=
5105 CGF
.CreateMemTemp(CGF
.getContext().getPointerType(E
->getType()),
5106 ".firstpriv.ptr.addr");
5107 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
5108 CallArgs
.push_back(PrivatePtr
.getPointer());
5109 ParamTypes
.push_back(PrivatePtr
.getType());
5111 auto *CopyFnTy
= llvm::FunctionType::get(CGF
.Builder
.getVoidTy(),
5112 ParamTypes
, /*isVarArg=*/false);
5113 CopyFn
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5114 CopyFn
, CopyFnTy
->getPointerTo());
5115 CGF
.CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(
5116 CGF
, S
.getBeginLoc(), {CopyFnTy
, CopyFn
}, CallArgs
);
5117 for (const auto &Pair
: PrivatePtrs
) {
5118 Address
Replacement(
5119 CGF
.Builder
.CreateLoad(Pair
.second
),
5120 CGF
.ConvertTypeForMem(Pair
.first
->getType().getNonReferenceType()),
5121 CGF
.getContext().getDeclAlign(Pair
.first
));
5122 Scope
.addPrivate(Pair
.first
, Replacement
);
5125 CGF
.processInReduction(S
, Data
, CGF
, CS
, Scope
);
5126 if (InputInfo
.NumberOfTargetItems
> 0) {
5127 InputInfo
.BasePointersArray
= CGF
.Builder
.CreateConstArrayGEP(
5128 CGF
.GetAddrOfLocalVar(BPVD
), /*Index=*/0);
5129 InputInfo
.PointersArray
= CGF
.Builder
.CreateConstArrayGEP(
5130 CGF
.GetAddrOfLocalVar(PVD
), /*Index=*/0);
5131 InputInfo
.SizesArray
= CGF
.Builder
.CreateConstArrayGEP(
5132 CGF
.GetAddrOfLocalVar(SVD
), /*Index=*/0);
5133 // If MVD is nullptr, the mapper array is not privatized
5135 InputInfo
.MappersArray
= CGF
.Builder
.CreateConstArrayGEP(
5136 CGF
.GetAddrOfLocalVar(MVD
), /*Index=*/0);
5140 OMPLexicalScope
LexScope(CGF
, S
, OMPD_task
, /*EmitPreInitStmt=*/false);
5143 llvm::Function
*OutlinedFn
= CGM
.getOpenMPRuntime().emitTaskOutlinedFunction(
5144 S
, *I
, *PartId
, *TaskT
, S
.getDirectiveKind(), CodeGen
, /*Tied=*/true,
5145 Data
.NumberOfParts
);
5146 llvm::APInt
TrueOrFalse(32, S
.hasClausesOfKind
<OMPNowaitClause
>() ? 1 : 0);
5147 IntegerLiteral
IfCond(getContext(), TrueOrFalse
,
5148 getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
5150 CGM
.getOpenMPRuntime().emitTaskCall(*this, S
.getBeginLoc(), S
, OutlinedFn
,
5151 SharedsTy
, CapturedStruct
, &IfCond
, Data
);
5154 void CodeGenFunction::processInReduction(const OMPExecutableDirective
&S
,
5155 OMPTaskDataTy
&Data
,
5156 CodeGenFunction
&CGF
,
5157 const CapturedStmt
*CS
,
5158 OMPPrivateScope
&Scope
) {
5159 if (Data
.Reductions
) {
5160 OpenMPDirectiveKind CapturedRegion
= S
.getDirectiveKind();
5161 OMPLexicalScope
LexScope(CGF
, S
, CapturedRegion
);
5162 ReductionCodeGen
RedCG(Data
.ReductionVars
, Data
.ReductionVars
,
5163 Data
.ReductionCopies
, Data
.ReductionOps
);
5164 llvm::Value
*ReductionsPtr
= CGF
.Builder
.CreateLoad(
5165 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(4)));
5166 for (unsigned Cnt
= 0, E
= Data
.ReductionVars
.size(); Cnt
< E
; ++Cnt
) {
5167 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
5168 RedCG
.emitAggregateType(CGF
, Cnt
);
5169 // FIXME: This must removed once the runtime library is fixed.
5170 // Emit required threadprivate variables for
5171 // initializer/combiner/finalizer.
5172 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
5174 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
5175 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
5177 Address(CGF
.EmitScalarConversion(
5178 Replacement
.getPointer(), CGF
.getContext().VoidPtrTy
,
5179 CGF
.getContext().getPointerType(
5180 Data
.ReductionCopies
[Cnt
]->getType()),
5181 Data
.ReductionCopies
[Cnt
]->getExprLoc()),
5182 CGF
.ConvertTypeForMem(Data
.ReductionCopies
[Cnt
]->getType()),
5183 Replacement
.getAlignment());
5184 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
5185 Scope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
5188 (void)Scope
.Privatize();
5189 SmallVector
<const Expr
*, 4> InRedVars
;
5190 SmallVector
<const Expr
*, 4> InRedPrivs
;
5191 SmallVector
<const Expr
*, 4> InRedOps
;
5192 SmallVector
<const Expr
*, 4> TaskgroupDescriptors
;
5193 for (const auto *C
: S
.getClausesOfKind
<OMPInReductionClause
>()) {
5194 auto IPriv
= C
->privates().begin();
5195 auto IRed
= C
->reduction_ops().begin();
5196 auto ITD
= C
->taskgroup_descriptors().begin();
5197 for (const Expr
*Ref
: C
->varlists()) {
5198 InRedVars
.emplace_back(Ref
);
5199 InRedPrivs
.emplace_back(*IPriv
);
5200 InRedOps
.emplace_back(*IRed
);
5201 TaskgroupDescriptors
.emplace_back(*ITD
);
5202 std::advance(IPriv
, 1);
5203 std::advance(IRed
, 1);
5204 std::advance(ITD
, 1);
5207 OMPPrivateScope
InRedScope(CGF
);
5208 if (!InRedVars
.empty()) {
5209 ReductionCodeGen
RedCG(InRedVars
, InRedVars
, InRedPrivs
, InRedOps
);
5210 for (unsigned Cnt
= 0, E
= InRedVars
.size(); Cnt
< E
; ++Cnt
) {
5211 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
5212 RedCG
.emitAggregateType(CGF
, Cnt
);
5213 // FIXME: This must removed once the runtime library is fixed.
5214 // Emit required threadprivate variables for
5215 // initializer/combiner/finalizer.
5216 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
5218 llvm::Value
*ReductionsPtr
;
5219 if (const Expr
*TRExpr
= TaskgroupDescriptors
[Cnt
]) {
5221 CGF
.EmitLoadOfScalar(CGF
.EmitLValue(TRExpr
), TRExpr
->getExprLoc());
5223 ReductionsPtr
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5225 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
5226 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
5227 Replacement
= Address(
5228 CGF
.EmitScalarConversion(
5229 Replacement
.getPointer(), CGF
.getContext().VoidPtrTy
,
5230 CGF
.getContext().getPointerType(InRedPrivs
[Cnt
]->getType()),
5231 InRedPrivs
[Cnt
]->getExprLoc()),
5232 CGF
.ConvertTypeForMem(InRedPrivs
[Cnt
]->getType()),
5233 Replacement
.getAlignment());
5234 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
5235 InRedScope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
5238 (void)InRedScope
.Privatize();
5241 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective
&S
) {
5242 // Emit outlined function for task construct.
5243 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_task
);
5244 Address CapturedStruct
= GenerateCapturedStmtArgument(*CS
);
5245 QualType SharedsTy
= getContext().getRecordType(CS
->getCapturedRecordDecl());
5246 const Expr
*IfCond
= nullptr;
5247 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
5248 if (C
->getNameModifier() == OMPD_unknown
||
5249 C
->getNameModifier() == OMPD_task
) {
5250 IfCond
= C
->getCondition();
5256 // Check if we should emit tied or untied task.
5257 Data
.Tied
= !S
.getSingleClause
<OMPUntiedClause
>();
5258 auto &&BodyGen
= [CS
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5259 CGF
.EmitStmt(CS
->getCapturedStmt());
5261 auto &&TaskGen
= [&S
, SharedsTy
, CapturedStruct
,
5262 IfCond
](CodeGenFunction
&CGF
, llvm::Function
*OutlinedFn
,
5263 const OMPTaskDataTy
&Data
) {
5264 CGF
.CGM
.getOpenMPRuntime().emitTaskCall(CGF
, S
.getBeginLoc(), S
, OutlinedFn
,
5265 SharedsTy
, CapturedStruct
, IfCond
,
5269 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
5270 EmitOMPTaskBasedDirective(S
, OMPD_task
, BodyGen
, TaskGen
, Data
);
5273 void CodeGenFunction::EmitOMPTaskyieldDirective(
5274 const OMPTaskyieldDirective
&S
) {
5275 CGM
.getOpenMPRuntime().emitTaskyieldCall(*this, S
.getBeginLoc());
5278 void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective
&S
) {
5279 const OMPMessageClause
*MC
= S
.getSingleClause
<OMPMessageClause
>();
5280 Expr
*ME
= MC
? MC
->getMessageString() : nullptr;
5281 const OMPSeverityClause
*SC
= S
.getSingleClause
<OMPSeverityClause
>();
5282 bool IsFatal
= false;
5283 if (!SC
|| SC
->getSeverityKind() == OMPC_SEVERITY_fatal
)
5285 CGM
.getOpenMPRuntime().emitErrorCall(*this, S
.getBeginLoc(), ME
, IsFatal
);
5288 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective
&S
) {
5289 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(), OMPD_barrier
);
5292 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective
&S
) {
5294 // Build list of dependences
5295 buildDependences(S
, Data
);
5296 Data
.HasNowaitClause
= S
.hasClausesOfKind
<OMPNowaitClause
>();
5297 CGM
.getOpenMPRuntime().emitTaskwaitCall(*this, S
.getBeginLoc(), Data
);
5300 bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective
&T
) {
5301 return T
.clauses().empty();
5304 void CodeGenFunction::EmitOMPTaskgroupDirective(
5305 const OMPTaskgroupDirective
&S
) {
5306 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5307 if (CGM
.getLangOpts().OpenMPIRBuilder
&& isSupportedByOpenMPIRBuilder(S
)) {
5308 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
5309 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
5310 InsertPointTy
AllocaIP(AllocaInsertPt
->getParent(),
5311 AllocaInsertPt
->getIterator());
5313 auto BodyGenCB
= [&, this](InsertPointTy AllocaIP
,
5314 InsertPointTy CodeGenIP
) {
5315 Builder
.restoreIP(CodeGenIP
);
5316 EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
5318 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo
;
5319 if (!CapturedStmtInfo
)
5320 CapturedStmtInfo
= &CapStmtInfo
;
5321 Builder
.restoreIP(OMPBuilder
.createTaskgroup(Builder
, AllocaIP
, BodyGenCB
));
5324 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5326 if (const Expr
*E
= S
.getReductionRef()) {
5327 SmallVector
<const Expr
*, 4> LHSs
;
5328 SmallVector
<const Expr
*, 4> RHSs
;
5330 for (const auto *C
: S
.getClausesOfKind
<OMPTaskReductionClause
>()) {
5331 Data
.ReductionVars
.append(C
->varlist_begin(), C
->varlist_end());
5332 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
5333 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
5334 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
5335 C
->reduction_ops().end());
5336 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
5337 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
5339 llvm::Value
*ReductionDesc
=
5340 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionInit(CGF
, S
.getBeginLoc(),
5342 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
5343 CGF
.EmitVarDecl(*VD
);
5344 CGF
.EmitStoreOfScalar(ReductionDesc
, CGF
.GetAddrOfLocalVar(VD
),
5345 /*Volatile=*/false, E
->getType());
5347 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
5349 CGM
.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen
, S
.getBeginLoc());
5352 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective
&S
) {
5353 llvm::AtomicOrdering AO
= S
.getSingleClause
<OMPFlushClause
>()
5354 ? llvm::AtomicOrdering::NotAtomic
5355 : llvm::AtomicOrdering::AcquireRelease
;
5356 CGM
.getOpenMPRuntime().emitFlush(
5358 [&S
]() -> ArrayRef
<const Expr
*> {
5359 if (const auto *FlushClause
= S
.getSingleClause
<OMPFlushClause
>())
5360 return llvm::ArrayRef(FlushClause
->varlist_begin(),
5361 FlushClause
->varlist_end());
5362 return std::nullopt
;
5364 S
.getBeginLoc(), AO
);
5367 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective
&S
) {
5368 const auto *DO
= S
.getSingleClause
<OMPDepobjClause
>();
5369 LValue DOLVal
= EmitLValue(DO
->getDepobj());
5370 if (const auto *DC
= S
.getSingleClause
<OMPDependClause
>()) {
5371 OMPTaskDataTy::DependData
Dependencies(DC
->getDependencyKind(),
5373 Dependencies
.DepExprs
.append(DC
->varlist_begin(), DC
->varlist_end());
5374 Address DepAddr
= CGM
.getOpenMPRuntime().emitDepobjDependClause(
5375 *this, Dependencies
, DC
->getBeginLoc());
5376 EmitStoreOfScalar(DepAddr
.getPointer(), DOLVal
);
5379 if (const auto *DC
= S
.getSingleClause
<OMPDestroyClause
>()) {
5380 CGM
.getOpenMPRuntime().emitDestroyClause(*this, DOLVal
, DC
->getBeginLoc());
5383 if (const auto *UC
= S
.getSingleClause
<OMPUpdateClause
>()) {
5384 CGM
.getOpenMPRuntime().emitUpdateClause(
5385 *this, DOLVal
, UC
->getDependencyKind(), UC
->getBeginLoc());
5390 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective
&S
) {
5391 if (!OMPParentLoopDirectiveForScan
)
5393 const OMPExecutableDirective
&ParentDir
= *OMPParentLoopDirectiveForScan
;
5394 bool IsInclusive
= S
.hasClausesOfKind
<OMPInclusiveClause
>();
5395 SmallVector
<const Expr
*, 4> Shareds
;
5396 SmallVector
<const Expr
*, 4> Privates
;
5397 SmallVector
<const Expr
*, 4> LHSs
;
5398 SmallVector
<const Expr
*, 4> RHSs
;
5399 SmallVector
<const Expr
*, 4> ReductionOps
;
5400 SmallVector
<const Expr
*, 4> CopyOps
;
5401 SmallVector
<const Expr
*, 4> CopyArrayTemps
;
5402 SmallVector
<const Expr
*, 4> CopyArrayElems
;
5403 for (const auto *C
: ParentDir
.getClausesOfKind
<OMPReductionClause
>()) {
5404 if (C
->getModifier() != OMPC_REDUCTION_inscan
)
5406 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
5407 Privates
.append(C
->privates().begin(), C
->privates().end());
5408 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
5409 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
5410 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
5411 CopyOps
.append(C
->copy_ops().begin(), C
->copy_ops().end());
5412 CopyArrayTemps
.append(C
->copy_array_temps().begin(),
5413 C
->copy_array_temps().end());
5414 CopyArrayElems
.append(C
->copy_array_elems().begin(),
5415 C
->copy_array_elems().end());
5417 if (ParentDir
.getDirectiveKind() == OMPD_simd
||
5418 (getLangOpts().OpenMPSimd
&&
5419 isOpenMPSimdDirective(ParentDir
.getDirectiveKind()))) {
5420 // For simd directive and simd-based directives in simd only mode, use the
5421 // following codegen:
5423 // #pragma omp simd reduction(inscan, +: x)
5426 // #pragma omp scan inclusive(x)
5429 // is transformed to:
5440 // #pragma omp simd reduction(inscan, +: x)
5443 // #pragma omp scan exclusive(x)
5456 llvm::BasicBlock
*OMPScanReduce
= createBasicBlock("omp.inscan.reduce");
5457 EmitBranch(IsInclusive
5459 : BreakContinueStack
.back().ContinueBlock
.getBlock());
5460 EmitBlock(OMPScanDispatch
);
5462 // New scope for correct construction/destruction of temp variables for
5464 LexicalScope
Scope(*this, S
.getSourceRange());
5465 EmitBranch(IsInclusive
? OMPBeforeScanBlock
: OMPAfterScanBlock
);
5466 EmitBlock(OMPScanReduce
);
5468 // Create temp var and copy LHS value to this temp value.
5470 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5471 const Expr
*PrivateExpr
= Privates
[I
];
5472 const Expr
*TempExpr
= CopyArrayTemps
[I
];
5474 *cast
<VarDecl
>(cast
<DeclRefExpr
>(TempExpr
)->getDecl()));
5475 LValue DestLVal
= EmitLValue(TempExpr
);
5476 LValue SrcLVal
= EmitLValue(LHSs
[I
]);
5477 EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(*this),
5478 SrcLVal
.getAddress(*this),
5479 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5480 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
5484 CGM
.getOpenMPRuntime().emitReduction(
5485 *this, ParentDir
.getEndLoc(), Privates
, LHSs
, RHSs
, ReductionOps
,
5486 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd
});
5487 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5488 const Expr
*PrivateExpr
= Privates
[I
];
5492 DestLVal
= EmitLValue(RHSs
[I
]);
5493 SrcLVal
= EmitLValue(LHSs
[I
]);
5495 const Expr
*TempExpr
= CopyArrayTemps
[I
];
5496 DestLVal
= EmitLValue(RHSs
[I
]);
5497 SrcLVal
= EmitLValue(TempExpr
);
5499 EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(*this),
5500 SrcLVal
.getAddress(*this),
5501 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5502 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
5506 EmitBranch(IsInclusive
? OMPAfterScanBlock
: OMPBeforeScanBlock
);
5507 OMPScanExitBlock
= IsInclusive
5508 ? BreakContinueStack
.back().ContinueBlock
.getBlock()
5510 EmitBlock(OMPAfterScanBlock
);
5514 EmitBranch(BreakContinueStack
.back().ContinueBlock
.getBlock());
5515 EmitBlock(OMPScanExitBlock
);
5517 if (OMPFirstScanLoop
) {
5518 // Emit buffer[i] = red; at the end of the input phase.
5519 const auto *IVExpr
= cast
<OMPLoopDirective
>(ParentDir
)
5520 .getIterationVariable()
5521 ->IgnoreParenImpCasts();
5522 LValue IdxLVal
= EmitLValue(IVExpr
);
5523 llvm::Value
*IdxVal
= EmitLoadOfScalar(IdxLVal
, IVExpr
->getExprLoc());
5524 IdxVal
= Builder
.CreateIntCast(IdxVal
, SizeTy
, /*isSigned=*/false);
5525 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5526 const Expr
*PrivateExpr
= Privates
[I
];
5527 const Expr
*OrigExpr
= Shareds
[I
];
5528 const Expr
*CopyArrayElem
= CopyArrayElems
[I
];
5529 OpaqueValueMapping
IdxMapping(
5531 cast
<OpaqueValueExpr
>(
5532 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
5533 RValue::get(IdxVal
));
5534 LValue DestLVal
= EmitLValue(CopyArrayElem
);
5535 LValue SrcLVal
= EmitLValue(OrigExpr
);
5536 EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(*this),
5537 SrcLVal
.getAddress(*this),
5538 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5539 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
5543 EmitBranch(BreakContinueStack
.back().ContinueBlock
.getBlock());
5545 EmitBlock(OMPScanExitBlock
);
5546 EmitBranch(BreakContinueStack
.back().ContinueBlock
.getBlock());
5548 EmitBlock(OMPScanDispatch
);
5549 if (!OMPFirstScanLoop
) {
5550 // Emit red = buffer[i]; at the entrance to the scan phase.
5551 const auto *IVExpr
= cast
<OMPLoopDirective
>(ParentDir
)
5552 .getIterationVariable()
5553 ->IgnoreParenImpCasts();
5554 LValue IdxLVal
= EmitLValue(IVExpr
);
5555 llvm::Value
*IdxVal
= EmitLoadOfScalar(IdxLVal
, IVExpr
->getExprLoc());
5556 IdxVal
= Builder
.CreateIntCast(IdxVal
, SizeTy
, /*isSigned=*/false);
5557 llvm::BasicBlock
*ExclusiveExitBB
= nullptr;
5559 llvm::BasicBlock
*ContBB
= createBasicBlock("omp.exclusive.dec");
5560 ExclusiveExitBB
= createBasicBlock("omp.exclusive.copy.exit");
5561 llvm::Value
*Cmp
= Builder
.CreateIsNull(IdxVal
);
5562 Builder
.CreateCondBr(Cmp
, ExclusiveExitBB
, ContBB
);
5564 // Use idx - 1 iteration for exclusive scan.
5565 IdxVal
= Builder
.CreateNUWSub(IdxVal
, llvm::ConstantInt::get(SizeTy
, 1));
5567 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5568 const Expr
*PrivateExpr
= Privates
[I
];
5569 const Expr
*OrigExpr
= Shareds
[I
];
5570 const Expr
*CopyArrayElem
= CopyArrayElems
[I
];
5571 OpaqueValueMapping
IdxMapping(
5573 cast
<OpaqueValueExpr
>(
5574 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
5575 RValue::get(IdxVal
));
5576 LValue SrcLVal
= EmitLValue(CopyArrayElem
);
5577 LValue DestLVal
= EmitLValue(OrigExpr
);
5578 EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(*this),
5579 SrcLVal
.getAddress(*this),
5580 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5581 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
5585 EmitBlock(ExclusiveExitBB
);
5588 EmitBranch((OMPFirstScanLoop
== IsInclusive
) ? OMPBeforeScanBlock
5589 : OMPAfterScanBlock
);
5590 EmitBlock(OMPAfterScanBlock
);
5593 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective
&S
,
5594 const CodeGenLoopTy
&CodeGenLoop
,
5596 // Emit the loop iteration variable.
5597 const auto *IVExpr
= cast
<DeclRefExpr
>(S
.getIterationVariable());
5598 const auto *IVDecl
= cast
<VarDecl
>(IVExpr
->getDecl());
5599 EmitVarDecl(*IVDecl
);
5601 // Emit the iterations count variable.
5602 // If it is not a variable, Sema decided to calculate iterations count on each
5603 // iteration (e.g., it is foldable into a constant).
5604 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
5605 EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
5606 // Emit calculation of the iterations count.
5607 EmitIgnoredExpr(S
.getCalcLastIteration());
5610 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
5612 bool HasLastprivateClause
= false;
5613 // Check pre-condition.
5615 OMPLoopScope
PreInitScope(*this, S
);
5616 // Skip the entire loop if we don't meet the precondition.
5617 // If the condition constant folds and can be elided, avoid emitting the
5620 llvm::BasicBlock
*ContBlock
= nullptr;
5621 if (ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
5625 llvm::BasicBlock
*ThenBlock
= createBasicBlock("omp.precond.then");
5626 ContBlock
= createBasicBlock("omp.precond.end");
5627 emitPreCond(*this, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
5628 getProfileCount(&S
));
5629 EmitBlock(ThenBlock
);
5630 incrementProfileCounter(&S
);
5633 emitAlignedClause(*this, S
);
5634 // Emit 'then' code.
5636 // Emit helper vars inits.
5638 LValue LB
= EmitOMPHelperVar(
5639 *this, cast
<DeclRefExpr
>(
5640 (isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5641 ? S
.getCombinedLowerBoundVariable()
5642 : S
.getLowerBoundVariable())));
5643 LValue UB
= EmitOMPHelperVar(
5644 *this, cast
<DeclRefExpr
>(
5645 (isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5646 ? S
.getCombinedUpperBoundVariable()
5647 : S
.getUpperBoundVariable())));
5649 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getStrideVariable()));
5651 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getIsLastIterVariable()));
5653 OMPPrivateScope
LoopScope(*this);
5654 if (EmitOMPFirstprivateClause(S
, LoopScope
)) {
5655 // Emit implicit barrier to synchronize threads and avoid data races
5656 // on initialization of firstprivate variables and post-update of
5657 // lastprivate variables.
5658 CGM
.getOpenMPRuntime().emitBarrierCall(
5659 *this, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
5660 /*ForceSimpleCall=*/true);
5662 EmitOMPPrivateClause(S
, LoopScope
);
5663 if (isOpenMPSimdDirective(S
.getDirectiveKind()) &&
5664 !isOpenMPParallelDirective(S
.getDirectiveKind()) &&
5665 !isOpenMPTeamsDirective(S
.getDirectiveKind()))
5666 EmitOMPReductionClauseInit(S
, LoopScope
);
5667 HasLastprivateClause
= EmitOMPLastprivateClauseInit(S
, LoopScope
);
5668 EmitOMPPrivateLoopCounters(S
, LoopScope
);
5669 (void)LoopScope
.Privatize();
5670 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
5671 CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S
);
5673 // Detect the distribute schedule kind and chunk.
5674 llvm::Value
*Chunk
= nullptr;
5675 OpenMPDistScheduleClauseKind ScheduleKind
= OMPC_DIST_SCHEDULE_unknown
;
5676 if (const auto *C
= S
.getSingleClause
<OMPDistScheduleClause
>()) {
5677 ScheduleKind
= C
->getDistScheduleKind();
5678 if (const Expr
*Ch
= C
->getChunkSize()) {
5679 Chunk
= EmitScalarExpr(Ch
);
5680 Chunk
= EmitScalarConversion(Chunk
, Ch
->getType(),
5681 S
.getIterationVariable()->getType(),
5685 // Default behaviour for dist_schedule clause.
5686 CGM
.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5687 *this, S
, ScheduleKind
, Chunk
);
5689 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
5690 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
5692 // OpenMP [2.10.8, distribute Construct, Description]
5693 // If dist_schedule is specified, kind must be static. If specified,
5694 // iterations are divided into chunks of size chunk_size, chunks are
5695 // assigned to the teams of the league in a round-robin fashion in the
5696 // order of the team number. When no chunk_size is specified, the
5697 // iteration space is divided into chunks that are approximately equal
5698 // in size, and at most one chunk is distributed to each team of the
5699 // league. The size of the chunks is unspecified in this case.
5700 bool StaticChunked
=
5701 RT
.isStaticChunked(ScheduleKind
, /* Chunked */ Chunk
!= nullptr) &&
5702 isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind());
5703 if (RT
.isStaticNonchunked(ScheduleKind
,
5704 /* Chunked */ Chunk
!= nullptr) ||
5706 CGOpenMPRuntime::StaticRTInput
StaticInit(
5707 IVSize
, IVSigned
, /* Ordered = */ false, IL
.getAddress(*this),
5708 LB
.getAddress(*this), UB
.getAddress(*this), ST
.getAddress(*this),
5709 StaticChunked
? Chunk
: nullptr);
5710 RT
.emitDistributeStaticInit(*this, S
.getBeginLoc(), ScheduleKind
,
5713 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
5714 // UB = min(UB, GlobalUB);
5715 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5716 ? S
.getCombinedEnsureUpperBound()
5717 : S
.getEnsureUpperBound());
5719 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5720 ? S
.getCombinedInit()
5724 isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5725 ? S
.getCombinedCond()
5729 Cond
= S
.getCombinedDistCond();
5731 // For static unchunked schedules generate:
5733 // 1. For distribute alone, codegen
5734 // while (idx <= UB) {
5739 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
5740 // while (idx <= UB) {
5741 // <CodeGen rest of pragma>(LB, UB);
5745 // For static chunk one schedule generate:
5747 // while (IV <= GlobalUB) {
5748 // <CodeGen rest of pragma>(LB, UB);
5751 // UB = min(UB, GlobalUB);
5757 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5758 if (isOpenMPSimdDirective(S
.getDirectiveKind()))
5759 CGF
.EmitOMPSimdInit(S
);
5761 [&S
, &LoopScope
, Cond
, IncExpr
, LoopExit
, &CodeGenLoop
,
5762 StaticChunked
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5763 CGF
.EmitOMPInnerLoop(
5764 S
, LoopScope
.requiresCleanups(), Cond
, IncExpr
,
5765 [&S
, LoopExit
, &CodeGenLoop
](CodeGenFunction
&CGF
) {
5766 CodeGenLoop(CGF
, S
, LoopExit
);
5768 [&S
, StaticChunked
](CodeGenFunction
&CGF
) {
5769 if (StaticChunked
) {
5770 CGF
.EmitIgnoredExpr(S
.getCombinedNextLowerBound());
5771 CGF
.EmitIgnoredExpr(S
.getCombinedNextUpperBound());
5772 CGF
.EmitIgnoredExpr(S
.getCombinedEnsureUpperBound());
5773 CGF
.EmitIgnoredExpr(S
.getCombinedInit());
5777 EmitBlock(LoopExit
.getBlock());
5778 // Tell the runtime we are done.
5779 RT
.emitForStaticFinish(*this, S
.getEndLoc(), S
.getDirectiveKind());
5781 // Emit the outer loop, which requests its work chunk [LB..UB] from
5782 // runtime and runs the inner loop to process it.
5783 const OMPLoopArguments LoopArguments
= {
5784 LB
.getAddress(*this), UB
.getAddress(*this), ST
.getAddress(*this),
5785 IL
.getAddress(*this), Chunk
};
5786 EmitOMPDistributeOuterLoop(ScheduleKind
, S
, LoopScope
, LoopArguments
,
5789 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
5790 EmitOMPSimdFinal(S
, [IL
, &S
](CodeGenFunction
&CGF
) {
5791 return CGF
.Builder
.CreateIsNotNull(
5792 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
5795 if (isOpenMPSimdDirective(S
.getDirectiveKind()) &&
5796 !isOpenMPParallelDirective(S
.getDirectiveKind()) &&
5797 !isOpenMPTeamsDirective(S
.getDirectiveKind())) {
5798 EmitOMPReductionClauseFinal(S
, OMPD_simd
);
5799 // Emit post-update of the reduction variables if IsLastIter != 0.
5800 emitPostUpdateForReductionClause(
5801 *this, S
, [IL
, &S
](CodeGenFunction
&CGF
) {
5802 return CGF
.Builder
.CreateIsNotNull(
5803 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
5806 // Emit final copy of the lastprivate variables if IsLastIter != 0.
5807 if (HasLastprivateClause
) {
5808 EmitOMPLastprivateClauseFinal(
5809 S
, /*NoFinals=*/false,
5810 Builder
.CreateIsNotNull(EmitLoadOfScalar(IL
, S
.getBeginLoc())));
5814 // We're now done with the loop, so jump to the continuation block.
5816 EmitBranch(ContBlock
);
5817 EmitBlock(ContBlock
, true);
5822 void CodeGenFunction::EmitOMPDistributeDirective(
5823 const OMPDistributeDirective
&S
) {
5824 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5825 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
5827 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5828 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute
, CodeGen
);
5831 static llvm::Function
*emitOutlinedOrderedFunction(CodeGenModule
&CGM
,
5832 const CapturedStmt
*S
,
5833 SourceLocation Loc
) {
5834 CodeGenFunction
CGF(CGM
, /*suppressNewContext=*/true);
5835 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo
;
5836 CGF
.CapturedStmtInfo
= &CapStmtInfo
;
5837 llvm::Function
*Fn
= CGF
.GenerateOpenMPCapturedStmtFunction(*S
, Loc
);
5838 Fn
->setDoesNotRecurse();
5842 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective
&S
) {
5843 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
5844 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
5845 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
5847 if (S
.hasClausesOfKind
<OMPDependClause
>()) {
5848 // The ordered directive with depend clause.
5849 assert(!S
.hasAssociatedStmt() &&
5850 "No associated statement must be in ordered depend construct.");
5851 InsertPointTy
AllocaIP(AllocaInsertPt
->getParent(),
5852 AllocaInsertPt
->getIterator());
5853 for (const auto *DC
: S
.getClausesOfKind
<OMPDependClause
>()) {
5854 unsigned NumLoops
= DC
->getNumLoops();
5855 QualType Int64Ty
= CGM
.getContext().getIntTypeForBitwidth(
5856 /*DestWidth=*/64, /*Signed=*/1);
5857 llvm::SmallVector
<llvm::Value
*> StoreValues
;
5858 for (unsigned I
= 0; I
< NumLoops
; I
++) {
5859 const Expr
*CounterVal
= DC
->getLoopData(I
);
5861 llvm::Value
*StoreValue
= EmitScalarConversion(
5862 EmitScalarExpr(CounterVal
), CounterVal
->getType(), Int64Ty
,
5863 CounterVal
->getExprLoc());
5864 StoreValues
.emplace_back(StoreValue
);
5866 bool IsDependSource
= false;
5867 if (DC
->getDependencyKind() == OMPC_DEPEND_source
)
5868 IsDependSource
= true;
5869 Builder
.restoreIP(OMPBuilder
.createOrderedDepend(
5870 Builder
, AllocaIP
, NumLoops
, StoreValues
, ".cnt.addr",
5874 // The ordered directive with threads or simd clause, or without clause.
5875 // Without clause, it behaves as if the threads clause is specified.
5876 const auto *C
= S
.getSingleClause
<OMPSIMDClause
>();
5878 auto FiniCB
= [this](InsertPointTy IP
) {
5879 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
5882 auto BodyGenCB
= [&S
, C
, this](InsertPointTy AllocaIP
,
5883 InsertPointTy CodeGenIP
) {
5884 Builder
.restoreIP(CodeGenIP
);
5886 const CapturedStmt
*CS
= S
.getInnermostCapturedStmt();
5888 llvm::BasicBlock
*FiniBB
= splitBBWithSuffix(
5889 Builder
, /*CreateBranch=*/false, ".ordered.after");
5890 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
5891 GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
5892 llvm::Function
*OutlinedFn
=
5893 emitOutlinedOrderedFunction(CGM
, CS
, S
.getBeginLoc());
5894 assert(S
.getBeginLoc().isValid() &&
5895 "Outlined function call location must be valid.");
5896 ApplyDebugLocation::CreateDefaultArtificial(*this, S
.getBeginLoc());
5897 OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP
, *FiniBB
,
5898 OutlinedFn
, CapturedVars
);
5900 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
5901 *this, CS
->getCapturedStmt(), AllocaIP
, CodeGenIP
, "ordered");
5905 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5907 OMPBuilder
.createOrderedThreadsSimd(Builder
, BodyGenCB
, FiniCB
, !C
));
5912 if (S
.hasClausesOfKind
<OMPDependClause
>()) {
5913 assert(!S
.hasAssociatedStmt() &&
5914 "No associated statement must be in ordered depend construct.");
5915 for (const auto *DC
: S
.getClausesOfKind
<OMPDependClause
>())
5916 CGM
.getOpenMPRuntime().emitDoacrossOrdered(*this, DC
);
5919 const auto *C
= S
.getSingleClause
<OMPSIMDClause
>();
5920 auto &&CodeGen
= [&S
, C
, this](CodeGenFunction
&CGF
,
5921 PrePostActionTy
&Action
) {
5922 const CapturedStmt
*CS
= S
.getInnermostCapturedStmt();
5924 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
5925 CGF
.GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
5926 llvm::Function
*OutlinedFn
=
5927 emitOutlinedOrderedFunction(CGM
, CS
, S
.getBeginLoc());
5928 CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, S
.getBeginLoc(),
5929 OutlinedFn
, CapturedVars
);
5932 CGF
.EmitStmt(CS
->getCapturedStmt());
5935 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5936 CGM
.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen
, S
.getBeginLoc(), !C
);
5939 static llvm::Value
*convertToScalarValue(CodeGenFunction
&CGF
, RValue Val
,
5940 QualType SrcType
, QualType DestType
,
5941 SourceLocation Loc
) {
5942 assert(CGF
.hasScalarEvaluationKind(DestType
) &&
5943 "DestType must have scalar evaluation kind.");
5944 assert(!Val
.isAggregate() && "Must be a scalar or complex.");
5945 return Val
.isScalar() ? CGF
.EmitScalarConversion(Val
.getScalarVal(), SrcType
,
5947 : CGF
.EmitComplexToScalarConversion(
5948 Val
.getComplexVal(), SrcType
, DestType
, Loc
);
5951 static CodeGenFunction::ComplexPairTy
5952 convertToComplexValue(CodeGenFunction
&CGF
, RValue Val
, QualType SrcType
,
5953 QualType DestType
, SourceLocation Loc
) {
5954 assert(CGF
.getEvaluationKind(DestType
) == TEK_Complex
&&
5955 "DestType must have complex evaluation kind.");
5956 CodeGenFunction::ComplexPairTy ComplexVal
;
5957 if (Val
.isScalar()) {
5958 // Convert the input element to the element type of the complex.
5959 QualType DestElementType
=
5960 DestType
->castAs
<ComplexType
>()->getElementType();
5961 llvm::Value
*ScalarVal
= CGF
.EmitScalarConversion(
5962 Val
.getScalarVal(), SrcType
, DestElementType
, Loc
);
5963 ComplexVal
= CodeGenFunction::ComplexPairTy(
5964 ScalarVal
, llvm::Constant::getNullValue(ScalarVal
->getType()));
5966 assert(Val
.isComplex() && "Must be a scalar or complex.");
5967 QualType SrcElementType
= SrcType
->castAs
<ComplexType
>()->getElementType();
5968 QualType DestElementType
=
5969 DestType
->castAs
<ComplexType
>()->getElementType();
5970 ComplexVal
.first
= CGF
.EmitScalarConversion(
5971 Val
.getComplexVal().first
, SrcElementType
, DestElementType
, Loc
);
5972 ComplexVal
.second
= CGF
.EmitScalarConversion(
5973 Val
.getComplexVal().second
, SrcElementType
, DestElementType
, Loc
);
5978 static void emitSimpleAtomicStore(CodeGenFunction
&CGF
, llvm::AtomicOrdering AO
,
5979 LValue LVal
, RValue RVal
) {
5980 if (LVal
.isGlobalReg())
5981 CGF
.EmitStoreThroughGlobalRegLValue(RVal
, LVal
);
5983 CGF
.EmitAtomicStore(RVal
, LVal
, AO
, LVal
.isVolatile(), /*isInit=*/false);
5986 static RValue
emitSimpleAtomicLoad(CodeGenFunction
&CGF
,
5987 llvm::AtomicOrdering AO
, LValue LVal
,
5988 SourceLocation Loc
) {
5989 if (LVal
.isGlobalReg())
5990 return CGF
.EmitLoadOfLValue(LVal
, Loc
);
5991 return CGF
.EmitAtomicLoad(
5992 LVal
, Loc
, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO
),
5996 void CodeGenFunction::emitOMPSimpleStore(LValue LVal
, RValue RVal
,
5997 QualType RValTy
, SourceLocation Loc
) {
5998 switch (getEvaluationKind(LVal
.getType())) {
6000 EmitStoreThroughLValue(RValue::get(convertToScalarValue(
6001 *this, RVal
, RValTy
, LVal
.getType(), Loc
)),
6006 convertToComplexValue(*this, RVal
, RValTy
, LVal
.getType(), Loc
), LVal
,
6010 llvm_unreachable("Must be a scalar or complex.");
6014 static void emitOMPAtomicReadExpr(CodeGenFunction
&CGF
, llvm::AtomicOrdering AO
,
6015 const Expr
*X
, const Expr
*V
,
6016 SourceLocation Loc
) {
6018 assert(V
->isLValue() && "V of 'omp atomic read' is not lvalue");
6019 assert(X
->isLValue() && "X of 'omp atomic read' is not lvalue");
6020 LValue XLValue
= CGF
.EmitLValue(X
);
6021 LValue VLValue
= CGF
.EmitLValue(V
);
6022 RValue Res
= emitSimpleAtomicLoad(CGF
, AO
, XLValue
, Loc
);
6023 // OpenMP, 2.17.7, atomic Construct
6024 // If the read or capture clause is specified and the acquire, acq_rel, or
6025 // seq_cst clause is specified then the strong flush on exit from the atomic
6026 // operation is also an acquire flush.
6028 case llvm::AtomicOrdering::Acquire
:
6029 case llvm::AtomicOrdering::AcquireRelease
:
6030 case llvm::AtomicOrdering::SequentiallyConsistent
:
6031 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6032 llvm::AtomicOrdering::Acquire
);
6034 case llvm::AtomicOrdering::Monotonic
:
6035 case llvm::AtomicOrdering::Release
:
6037 case llvm::AtomicOrdering::NotAtomic
:
6038 case llvm::AtomicOrdering::Unordered
:
6039 llvm_unreachable("Unexpected ordering.");
6041 CGF
.emitOMPSimpleStore(VLValue
, Res
, X
->getType().getNonReferenceType(), Loc
);
6042 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, V
);
6045 static void emitOMPAtomicWriteExpr(CodeGenFunction
&CGF
,
6046 llvm::AtomicOrdering AO
, const Expr
*X
,
6047 const Expr
*E
, SourceLocation Loc
) {
6049 assert(X
->isLValue() && "X of 'omp atomic write' is not lvalue");
6050 emitSimpleAtomicStore(CGF
, AO
, CGF
.EmitLValue(X
), CGF
.EmitAnyExpr(E
));
6051 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6052 // OpenMP, 2.17.7, atomic Construct
6053 // If the write, update, or capture clause is specified and the release,
6054 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6055 // the atomic operation is also a release flush.
6057 case llvm::AtomicOrdering::Release
:
6058 case llvm::AtomicOrdering::AcquireRelease
:
6059 case llvm::AtomicOrdering::SequentiallyConsistent
:
6060 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6061 llvm::AtomicOrdering::Release
);
6063 case llvm::AtomicOrdering::Acquire
:
6064 case llvm::AtomicOrdering::Monotonic
:
6066 case llvm::AtomicOrdering::NotAtomic
:
6067 case llvm::AtomicOrdering::Unordered
:
6068 llvm_unreachable("Unexpected ordering.");
6072 static std::pair
<bool, RValue
> emitOMPAtomicRMW(CodeGenFunction
&CGF
, LValue X
,
6074 BinaryOperatorKind BO
,
6075 llvm::AtomicOrdering AO
,
6076 bool IsXLHSInRHSPart
) {
6077 ASTContext
&Context
= CGF
.getContext();
6078 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6079 // expression is simple and atomic is allowed for the given type for the
6081 if (BO
== BO_Comma
|| !Update
.isScalar() || !X
.isSimple() ||
6082 (!isa
<llvm::ConstantInt
>(Update
.getScalarVal()) &&
6083 (Update
.getScalarVal()->getType() !=
6084 X
.getAddress(CGF
).getElementType())) ||
6085 !Context
.getTargetInfo().hasBuiltinAtomic(
6086 Context
.getTypeSize(X
.getType()), Context
.toBits(X
.getAlignment())))
6087 return std::make_pair(false, RValue::get(nullptr));
6089 auto &&CheckAtomicSupport
= [&CGF
](llvm::Type
*T
, BinaryOperatorKind BO
) {
6090 if (T
->isIntegerTy())
6093 if (T
->isFloatingPointTy() && (BO
== BO_Add
|| BO
== BO_Sub
))
6094 return llvm::isPowerOf2_64(CGF
.CGM
.getDataLayout().getTypeStoreSize(T
));
6099 if (!CheckAtomicSupport(Update
.getScalarVal()->getType(), BO
) ||
6100 !CheckAtomicSupport(X
.getAddress(CGF
).getElementType(), BO
))
6101 return std::make_pair(false, RValue::get(nullptr));
6103 bool IsInteger
= X
.getAddress(CGF
).getElementType()->isIntegerTy();
6104 llvm::AtomicRMWInst::BinOp RMWOp
;
6107 RMWOp
= IsInteger
? llvm::AtomicRMWInst::Add
: llvm::AtomicRMWInst::FAdd
;
6110 if (!IsXLHSInRHSPart
)
6111 return std::make_pair(false, RValue::get(nullptr));
6112 RMWOp
= IsInteger
? llvm::AtomicRMWInst::Sub
: llvm::AtomicRMWInst::FSub
;
6115 RMWOp
= llvm::AtomicRMWInst::And
;
6118 RMWOp
= llvm::AtomicRMWInst::Or
;
6121 RMWOp
= llvm::AtomicRMWInst::Xor
;
6125 RMWOp
= X
.getType()->hasSignedIntegerRepresentation()
6126 ? (IsXLHSInRHSPart
? llvm::AtomicRMWInst::Min
6127 : llvm::AtomicRMWInst::Max
)
6128 : (IsXLHSInRHSPart
? llvm::AtomicRMWInst::UMin
6129 : llvm::AtomicRMWInst::UMax
);
6131 RMWOp
= IsXLHSInRHSPart
? llvm::AtomicRMWInst::FMin
6132 : llvm::AtomicRMWInst::FMax
;
6136 RMWOp
= X
.getType()->hasSignedIntegerRepresentation()
6137 ? (IsXLHSInRHSPart
? llvm::AtomicRMWInst::Max
6138 : llvm::AtomicRMWInst::Min
)
6139 : (IsXLHSInRHSPart
? llvm::AtomicRMWInst::UMax
6140 : llvm::AtomicRMWInst::UMin
);
6142 RMWOp
= IsXLHSInRHSPart
? llvm::AtomicRMWInst::FMax
6143 : llvm::AtomicRMWInst::FMin
;
6146 RMWOp
= llvm::AtomicRMWInst::Xchg
;
6155 return std::make_pair(false, RValue::get(nullptr));
6174 llvm_unreachable("Unsupported atomic update operation");
6176 llvm::Value
*UpdateVal
= Update
.getScalarVal();
6177 if (auto *IC
= dyn_cast
<llvm::ConstantInt
>(UpdateVal
)) {
6179 UpdateVal
= CGF
.Builder
.CreateIntCast(
6180 IC
, X
.getAddress(CGF
).getElementType(),
6181 X
.getType()->hasSignedIntegerRepresentation());
6183 UpdateVal
= CGF
.Builder
.CreateCast(llvm::Instruction::CastOps::UIToFP
, IC
,
6184 X
.getAddress(CGF
).getElementType());
6187 CGF
.Builder
.CreateAtomicRMW(RMWOp
, X
.getPointer(CGF
), UpdateVal
, AO
);
6188 return std::make_pair(true, RValue::get(Res
));
6191 std::pair
<bool, RValue
> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6192 LValue X
, RValue E
, BinaryOperatorKind BO
, bool IsXLHSInRHSPart
,
6193 llvm::AtomicOrdering AO
, SourceLocation Loc
,
6194 const llvm::function_ref
<RValue(RValue
)> CommonGen
) {
6195 // Update expressions are allowed to have the following forms:
6196 // x binop= expr; -> xrval + expr;
6197 // x++, ++x -> xrval + 1;
6198 // x--, --x -> xrval - 1;
6199 // x = x binop expr; -> xrval binop expr
6200 // x = expr Op x; - > expr binop xrval;
6201 auto Res
= emitOMPAtomicRMW(*this, X
, E
, BO
, AO
, IsXLHSInRHSPart
);
6203 if (X
.isGlobalReg()) {
6204 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6206 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X
, Loc
)), X
);
6208 // Perform compare-and-swap procedure.
6209 EmitAtomicUpdate(X
, AO
, CommonGen
, X
.getType().isVolatileQualified());
6215 static void emitOMPAtomicUpdateExpr(CodeGenFunction
&CGF
,
6216 llvm::AtomicOrdering AO
, const Expr
*X
,
6217 const Expr
*E
, const Expr
*UE
,
6218 bool IsXLHSInRHSPart
, SourceLocation Loc
) {
6219 assert(isa
<BinaryOperator
>(UE
->IgnoreImpCasts()) &&
6220 "Update expr in 'atomic update' must be a binary operator.");
6221 const auto *BOUE
= cast
<BinaryOperator
>(UE
->IgnoreImpCasts());
6222 // Update expressions are allowed to have the following forms:
6223 // x binop= expr; -> xrval + expr;
6224 // x++, ++x -> xrval + 1;
6225 // x--, --x -> xrval - 1;
6226 // x = x binop expr; -> xrval binop expr
6227 // x = expr Op x; - > expr binop xrval;
6228 assert(X
->isLValue() && "X of 'omp atomic update' is not lvalue");
6229 LValue XLValue
= CGF
.EmitLValue(X
);
6230 RValue ExprRValue
= CGF
.EmitAnyExpr(E
);
6231 const auto *LHS
= cast
<OpaqueValueExpr
>(BOUE
->getLHS()->IgnoreImpCasts());
6232 const auto *RHS
= cast
<OpaqueValueExpr
>(BOUE
->getRHS()->IgnoreImpCasts());
6233 const OpaqueValueExpr
*XRValExpr
= IsXLHSInRHSPart
? LHS
: RHS
;
6234 const OpaqueValueExpr
*ERValExpr
= IsXLHSInRHSPart
? RHS
: LHS
;
6235 auto &&Gen
= [&CGF
, UE
, ExprRValue
, XRValExpr
, ERValExpr
](RValue XRValue
) {
6236 CodeGenFunction::OpaqueValueMapping
MapExpr(CGF
, ERValExpr
, ExprRValue
);
6237 CodeGenFunction::OpaqueValueMapping
MapX(CGF
, XRValExpr
, XRValue
);
6238 return CGF
.EmitAnyExpr(UE
);
6240 (void)CGF
.EmitOMPAtomicSimpleUpdateExpr(
6241 XLValue
, ExprRValue
, BOUE
->getOpcode(), IsXLHSInRHSPart
, AO
, Loc
, Gen
);
6242 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6243 // OpenMP, 2.17.7, atomic Construct
6244 // If the write, update, or capture clause is specified and the release,
6245 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6246 // the atomic operation is also a release flush.
6248 case llvm::AtomicOrdering::Release
:
6249 case llvm::AtomicOrdering::AcquireRelease
:
6250 case llvm::AtomicOrdering::SequentiallyConsistent
:
6251 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6252 llvm::AtomicOrdering::Release
);
6254 case llvm::AtomicOrdering::Acquire
:
6255 case llvm::AtomicOrdering::Monotonic
:
6257 case llvm::AtomicOrdering::NotAtomic
:
6258 case llvm::AtomicOrdering::Unordered
:
6259 llvm_unreachable("Unexpected ordering.");
6263 static RValue
convertToType(CodeGenFunction
&CGF
, RValue Value
,
6264 QualType SourceType
, QualType ResType
,
6265 SourceLocation Loc
) {
6266 switch (CGF
.getEvaluationKind(ResType
)) {
6269 convertToScalarValue(CGF
, Value
, SourceType
, ResType
, Loc
));
6271 auto Res
= convertToComplexValue(CGF
, Value
, SourceType
, ResType
, Loc
);
6272 return RValue::getComplex(Res
.first
, Res
.second
);
6277 llvm_unreachable("Must be a scalar or complex.");
6280 static void emitOMPAtomicCaptureExpr(CodeGenFunction
&CGF
,
6281 llvm::AtomicOrdering AO
,
6282 bool IsPostfixUpdate
, const Expr
*V
,
6283 const Expr
*X
, const Expr
*E
,
6284 const Expr
*UE
, bool IsXLHSInRHSPart
,
6285 SourceLocation Loc
) {
6286 assert(X
->isLValue() && "X of 'omp atomic capture' is not lvalue");
6287 assert(V
->isLValue() && "V of 'omp atomic capture' is not lvalue");
6289 LValue VLValue
= CGF
.EmitLValue(V
);
6290 LValue XLValue
= CGF
.EmitLValue(X
);
6291 RValue ExprRValue
= CGF
.EmitAnyExpr(E
);
6292 QualType NewVValType
;
6294 // 'x' is updated with some additional value.
6295 assert(isa
<BinaryOperator
>(UE
->IgnoreImpCasts()) &&
6296 "Update expr in 'atomic capture' must be a binary operator.");
6297 const auto *BOUE
= cast
<BinaryOperator
>(UE
->IgnoreImpCasts());
6298 // Update expressions are allowed to have the following forms:
6299 // x binop= expr; -> xrval + expr;
6300 // x++, ++x -> xrval + 1;
6301 // x--, --x -> xrval - 1;
6302 // x = x binop expr; -> xrval binop expr
6303 // x = expr Op x; - > expr binop xrval;
6304 const auto *LHS
= cast
<OpaqueValueExpr
>(BOUE
->getLHS()->IgnoreImpCasts());
6305 const auto *RHS
= cast
<OpaqueValueExpr
>(BOUE
->getRHS()->IgnoreImpCasts());
6306 const OpaqueValueExpr
*XRValExpr
= IsXLHSInRHSPart
? LHS
: RHS
;
6307 NewVValType
= XRValExpr
->getType();
6308 const OpaqueValueExpr
*ERValExpr
= IsXLHSInRHSPart
? RHS
: LHS
;
6309 auto &&Gen
= [&CGF
, &NewVVal
, UE
, ExprRValue
, XRValExpr
, ERValExpr
,
6310 IsPostfixUpdate
](RValue XRValue
) {
6311 CodeGenFunction::OpaqueValueMapping
MapExpr(CGF
, ERValExpr
, ExprRValue
);
6312 CodeGenFunction::OpaqueValueMapping
MapX(CGF
, XRValExpr
, XRValue
);
6313 RValue Res
= CGF
.EmitAnyExpr(UE
);
6314 NewVVal
= IsPostfixUpdate
? XRValue
: Res
;
6317 auto Res
= CGF
.EmitOMPAtomicSimpleUpdateExpr(
6318 XLValue
, ExprRValue
, BOUE
->getOpcode(), IsXLHSInRHSPart
, AO
, Loc
, Gen
);
6319 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6321 // 'atomicrmw' instruction was generated.
6322 if (IsPostfixUpdate
) {
6323 // Use old value from 'atomicrmw'.
6324 NewVVal
= Res
.second
;
6326 // 'atomicrmw' does not provide new value, so evaluate it using old
6328 CodeGenFunction::OpaqueValueMapping
MapExpr(CGF
, ERValExpr
, ExprRValue
);
6329 CodeGenFunction::OpaqueValueMapping
MapX(CGF
, XRValExpr
, Res
.second
);
6330 NewVVal
= CGF
.EmitAnyExpr(UE
);
6334 // 'x' is simply rewritten with some 'expr'.
6335 NewVValType
= X
->getType().getNonReferenceType();
6336 ExprRValue
= convertToType(CGF
, ExprRValue
, E
->getType(),
6337 X
->getType().getNonReferenceType(), Loc
);
6338 auto &&Gen
= [&NewVVal
, ExprRValue
](RValue XRValue
) {
6342 // Try to perform atomicrmw xchg, otherwise simple exchange.
6343 auto Res
= CGF
.EmitOMPAtomicSimpleUpdateExpr(
6344 XLValue
, ExprRValue
, /*BO=*/BO_Assign
, /*IsXLHSInRHSPart=*/false, AO
,
6346 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6348 // 'atomicrmw' instruction was generated.
6349 NewVVal
= IsPostfixUpdate
? Res
.second
: ExprRValue
;
6352 // Emit post-update store to 'v' of old/new 'x' value.
6353 CGF
.emitOMPSimpleStore(VLValue
, NewVVal
, NewVValType
, Loc
);
6354 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, V
);
6355 // OpenMP 5.1 removes the required flush for capture clause.
6356 if (CGF
.CGM
.getLangOpts().OpenMP
< 51) {
6357 // OpenMP, 2.17.7, atomic Construct
6358 // If the write, update, or capture clause is specified and the release,
6359 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6360 // the atomic operation is also a release flush.
6361 // If the read or capture clause is specified and the acquire, acq_rel, or
6362 // seq_cst clause is specified then the strong flush on exit from the atomic
6363 // operation is also an acquire flush.
6365 case llvm::AtomicOrdering::Release
:
6366 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6367 llvm::AtomicOrdering::Release
);
6369 case llvm::AtomicOrdering::Acquire
:
6370 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6371 llvm::AtomicOrdering::Acquire
);
6373 case llvm::AtomicOrdering::AcquireRelease
:
6374 case llvm::AtomicOrdering::SequentiallyConsistent
:
6375 CGF
.CGM
.getOpenMPRuntime().emitFlush(
6376 CGF
, std::nullopt
, Loc
, llvm::AtomicOrdering::AcquireRelease
);
6378 case llvm::AtomicOrdering::Monotonic
:
6380 case llvm::AtomicOrdering::NotAtomic
:
6381 case llvm::AtomicOrdering::Unordered
:
6382 llvm_unreachable("Unexpected ordering.");
6387 static void emitOMPAtomicCompareExpr(CodeGenFunction
&CGF
,
6388 llvm::AtomicOrdering AO
, const Expr
*X
,
6389 const Expr
*V
, const Expr
*R
,
6390 const Expr
*E
, const Expr
*D
,
6391 const Expr
*CE
, bool IsXBinopExpr
,
6392 bool IsPostfixUpdate
, bool IsFailOnly
,
6393 SourceLocation Loc
) {
6394 llvm::OpenMPIRBuilder
&OMPBuilder
=
6395 CGF
.CGM
.getOpenMPRuntime().getOMPBuilder();
6397 OMPAtomicCompareOp Op
;
6398 assert(isa
<BinaryOperator
>(CE
) && "CE is not a BinaryOperator");
6399 switch (cast
<BinaryOperator
>(CE
)->getOpcode()) {
6401 Op
= OMPAtomicCompareOp::EQ
;
6404 Op
= OMPAtomicCompareOp::MIN
;
6407 Op
= OMPAtomicCompareOp::MAX
;
6410 llvm_unreachable("unsupported atomic compare binary operator");
6413 LValue XLVal
= CGF
.EmitLValue(X
);
6414 Address XAddr
= XLVal
.getAddress(CGF
);
6416 auto EmitRValueWithCastIfNeeded
= [&CGF
, Loc
](const Expr
*X
, const Expr
*E
) {
6417 if (X
->getType() == E
->getType())
6418 return CGF
.EmitScalarExpr(E
);
6419 const Expr
*NewE
= E
->IgnoreImplicitAsWritten();
6420 llvm::Value
*V
= CGF
.EmitScalarExpr(NewE
);
6421 if (NewE
->getType() == X
->getType())
6423 return CGF
.EmitScalarConversion(V
, NewE
->getType(), X
->getType(), Loc
);
6426 llvm::Value
*EVal
= EmitRValueWithCastIfNeeded(X
, E
);
6427 llvm::Value
*DVal
= D
? EmitRValueWithCastIfNeeded(X
, D
) : nullptr;
6428 if (auto *CI
= dyn_cast
<llvm::ConstantInt
>(EVal
))
6429 EVal
= CGF
.Builder
.CreateIntCast(
6430 CI
, XLVal
.getAddress(CGF
).getElementType(),
6431 E
->getType()->hasSignedIntegerRepresentation());
6433 if (auto *CI
= dyn_cast
<llvm::ConstantInt
>(DVal
))
6434 DVal
= CGF
.Builder
.CreateIntCast(
6435 CI
, XLVal
.getAddress(CGF
).getElementType(),
6436 D
->getType()->hasSignedIntegerRepresentation());
6438 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal
{
6439 XAddr
.getPointer(), XAddr
.getElementType(),
6440 X
->getType()->hasSignedIntegerRepresentation(),
6441 X
->getType().isVolatileQualified()};
6442 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal
, ROpVal
;
6444 LValue LV
= CGF
.EmitLValue(V
);
6445 Address Addr
= LV
.getAddress(CGF
);
6446 VOpVal
= {Addr
.getPointer(), Addr
.getElementType(),
6447 V
->getType()->hasSignedIntegerRepresentation(),
6448 V
->getType().isVolatileQualified()};
6451 LValue LV
= CGF
.EmitLValue(R
);
6452 Address Addr
= LV
.getAddress(CGF
);
6453 ROpVal
= {Addr
.getPointer(), Addr
.getElementType(),
6454 R
->getType()->hasSignedIntegerRepresentation(),
6455 R
->getType().isVolatileQualified()};
6458 CGF
.Builder
.restoreIP(OMPBuilder
.createAtomicCompare(
6459 CGF
.Builder
, XOpVal
, VOpVal
, ROpVal
, EVal
, DVal
, AO
, Op
, IsXBinopExpr
,
6460 IsPostfixUpdate
, IsFailOnly
));
6463 static void emitOMPAtomicExpr(CodeGenFunction
&CGF
, OpenMPClauseKind Kind
,
6464 llvm::AtomicOrdering AO
, bool IsPostfixUpdate
,
6465 const Expr
*X
, const Expr
*V
, const Expr
*R
,
6466 const Expr
*E
, const Expr
*UE
, const Expr
*D
,
6467 const Expr
*CE
, bool IsXLHSInRHSPart
,
6468 bool IsFailOnly
, SourceLocation Loc
) {
6471 emitOMPAtomicReadExpr(CGF
, AO
, X
, V
, Loc
);
6474 emitOMPAtomicWriteExpr(CGF
, AO
, X
, E
, Loc
);
6478 emitOMPAtomicUpdateExpr(CGF
, AO
, X
, E
, UE
, IsXLHSInRHSPart
, Loc
);
6481 emitOMPAtomicCaptureExpr(CGF
, AO
, IsPostfixUpdate
, V
, X
, E
, UE
,
6482 IsXLHSInRHSPart
, Loc
);
6484 case OMPC_compare
: {
6485 emitOMPAtomicCompareExpr(CGF
, AO
, X
, V
, R
, E
, D
, CE
, IsXLHSInRHSPart
,
6486 IsPostfixUpdate
, IsFailOnly
, Loc
);
6490 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6494 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective
&S
) {
6495 llvm::AtomicOrdering AO
= llvm::AtomicOrdering::Monotonic
;
6496 bool MemOrderingSpecified
= false;
6497 if (S
.getSingleClause
<OMPSeqCstClause
>()) {
6498 AO
= llvm::AtomicOrdering::SequentiallyConsistent
;
6499 MemOrderingSpecified
= true;
6500 } else if (S
.getSingleClause
<OMPAcqRelClause
>()) {
6501 AO
= llvm::AtomicOrdering::AcquireRelease
;
6502 MemOrderingSpecified
= true;
6503 } else if (S
.getSingleClause
<OMPAcquireClause
>()) {
6504 AO
= llvm::AtomicOrdering::Acquire
;
6505 MemOrderingSpecified
= true;
6506 } else if (S
.getSingleClause
<OMPReleaseClause
>()) {
6507 AO
= llvm::AtomicOrdering::Release
;
6508 MemOrderingSpecified
= true;
6509 } else if (S
.getSingleClause
<OMPRelaxedClause
>()) {
6510 AO
= llvm::AtomicOrdering::Monotonic
;
6511 MemOrderingSpecified
= true;
6513 llvm::SmallSet
<OpenMPClauseKind
, 2> KindsEncountered
;
6514 OpenMPClauseKind Kind
= OMPC_unknown
;
6515 for (const OMPClause
*C
: S
.clauses()) {
6516 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6518 OpenMPClauseKind K
= C
->getClauseKind();
6519 if (K
== OMPC_seq_cst
|| K
== OMPC_acq_rel
|| K
== OMPC_acquire
||
6520 K
== OMPC_release
|| K
== OMPC_relaxed
|| K
== OMPC_hint
)
6523 KindsEncountered
.insert(K
);
6525 // We just need to correct Kind here. No need to set a bool saying it is
6526 // actually compare capture because we can tell from whether V and R are
6528 if (KindsEncountered
.contains(OMPC_compare
) &&
6529 KindsEncountered
.contains(OMPC_capture
))
6530 Kind
= OMPC_compare
;
6531 if (!MemOrderingSpecified
) {
6532 llvm::AtomicOrdering DefaultOrder
=
6533 CGM
.getOpenMPRuntime().getDefaultMemoryOrdering();
6534 if (DefaultOrder
== llvm::AtomicOrdering::Monotonic
||
6535 DefaultOrder
== llvm::AtomicOrdering::SequentiallyConsistent
||
6536 (DefaultOrder
== llvm::AtomicOrdering::AcquireRelease
&&
6537 Kind
== OMPC_capture
)) {
6539 } else if (DefaultOrder
== llvm::AtomicOrdering::AcquireRelease
) {
6540 if (Kind
== OMPC_unknown
|| Kind
== OMPC_update
|| Kind
== OMPC_write
) {
6541 AO
= llvm::AtomicOrdering::Release
;
6542 } else if (Kind
== OMPC_read
) {
6543 assert(Kind
== OMPC_read
&& "Unexpected atomic kind.");
6544 AO
= llvm::AtomicOrdering::Acquire
;
6549 LexicalScope
Scope(*this, S
.getSourceRange());
6550 EmitStopPoint(S
.getAssociatedStmt());
6551 emitOMPAtomicExpr(*this, Kind
, AO
, S
.isPostfixUpdate(), S
.getX(), S
.getV(),
6552 S
.getR(), S
.getExpr(), S
.getUpdateExpr(), S
.getD(),
6553 S
.getCondExpr(), S
.isXLHSInRHSPart(), S
.isFailOnly(),
6557 static void emitCommonOMPTargetDirective(CodeGenFunction
&CGF
,
6558 const OMPExecutableDirective
&S
,
6559 const RegionCodeGenTy
&CodeGen
) {
6560 assert(isOpenMPTargetExecutionDirective(S
.getDirectiveKind()));
6561 CodeGenModule
&CGM
= CGF
.CGM
;
6563 // On device emit this construct as inlined code.
6564 if (CGM
.getLangOpts().OpenMPIsDevice
) {
6565 OMPLexicalScope
Scope(CGF
, S
, OMPD_target
);
6566 CGM
.getOpenMPRuntime().emitInlinedDirective(
6567 CGF
, OMPD_target
, [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6568 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
6573 auto LPCRegion
= CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF
, S
);
6574 llvm::Function
*Fn
= nullptr;
6575 llvm::Constant
*FnID
= nullptr;
6577 const Expr
*IfCond
= nullptr;
6578 // Check for the at most one if clause associated with the target region.
6579 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
6580 if (C
->getNameModifier() == OMPD_unknown
||
6581 C
->getNameModifier() == OMPD_target
) {
6582 IfCond
= C
->getCondition();
6587 // Check if we have any device clause associated with the directive.
6588 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device(
6589 nullptr, OMPC_DEVICE_unknown
);
6590 if (auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
6591 Device
.setPointerAndInt(C
->getDevice(), C
->getModifier());
6593 // Check if we have an if clause whose conditional always evaluates to false
6594 // or if we do not have any targets specified. If so the target region is not
6595 // an offload entry point.
6596 bool IsOffloadEntry
= true;
6599 if (CGF
.ConstantFoldsToSimpleInteger(IfCond
, Val
) && !Val
)
6600 IsOffloadEntry
= false;
6602 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
6603 IsOffloadEntry
= false;
6605 if (CGM
.getLangOpts().OpenMPOffloadMandatory
&& !IsOffloadEntry
) {
6606 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
6607 DiagnosticsEngine::Error
,
6608 "No offloading entry generated while offloading is mandatory.");
6609 CGM
.getDiags().Report(DiagID
);
6612 assert(CGF
.CurFuncDecl
&& "No parent declaration for target region!");
6613 StringRef ParentName
;
6614 // In case we have Ctors/Dtors we use the complete type variant to produce
6615 // the mangling of the device outlined kernel.
6616 if (const auto *D
= dyn_cast
<CXXConstructorDecl
>(CGF
.CurFuncDecl
))
6617 ParentName
= CGM
.getMangledName(GlobalDecl(D
, Ctor_Complete
));
6618 else if (const auto *D
= dyn_cast
<CXXDestructorDecl
>(CGF
.CurFuncDecl
))
6619 ParentName
= CGM
.getMangledName(GlobalDecl(D
, Dtor_Complete
));
6622 CGM
.getMangledName(GlobalDecl(cast
<FunctionDecl
>(CGF
.CurFuncDecl
)));
6624 // Emit target region as a standalone region.
6625 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(S
, ParentName
, Fn
, FnID
,
6626 IsOffloadEntry
, CodeGen
);
6627 OMPLexicalScope
Scope(CGF
, S
, OMPD_task
);
6628 auto &&SizeEmitter
=
6629 [IsOffloadEntry
](CodeGenFunction
&CGF
,
6630 const OMPLoopDirective
&D
) -> llvm::Value
* {
6631 if (IsOffloadEntry
) {
6632 OMPLoopScope(CGF
, D
);
6633 // Emit calculation of the iterations count.
6634 llvm::Value
*NumIterations
= CGF
.EmitScalarExpr(D
.getNumIterations());
6635 NumIterations
= CGF
.Builder
.CreateIntCast(NumIterations
, CGF
.Int64Ty
,
6636 /*isSigned=*/false);
6637 return NumIterations
;
6641 CGM
.getOpenMPRuntime().emitTargetCall(CGF
, S
, Fn
, FnID
, IfCond
, Device
,
6645 static void emitTargetRegion(CodeGenFunction
&CGF
, const OMPTargetDirective
&S
,
6646 PrePostActionTy
&Action
) {
6648 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6649 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
6650 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
6651 (void)PrivateScope
.Privatize();
6652 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
6653 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
6655 CGF
.EmitStmt(S
.getCapturedStmt(OMPD_target
)->getCapturedStmt());
6656 CGF
.EnsureInsertPoint();
6659 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule
&CGM
,
6660 StringRef ParentName
,
6661 const OMPTargetDirective
&S
) {
6662 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6663 emitTargetRegion(CGF
, S
, Action
);
6666 llvm::Constant
*Addr
;
6667 // Emit target region as a standalone region.
6668 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6669 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6670 assert(Fn
&& Addr
&& "Target device function emission failed.");
6673 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective
&S
) {
6674 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6675 emitTargetRegion(CGF
, S
, Action
);
6677 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6680 static void emitCommonOMPTeamsDirective(CodeGenFunction
&CGF
,
6681 const OMPExecutableDirective
&S
,
6682 OpenMPDirectiveKind InnermostKind
,
6683 const RegionCodeGenTy
&CodeGen
) {
6684 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_teams
);
6685 llvm::Function
*OutlinedFn
=
6686 CGF
.CGM
.getOpenMPRuntime().emitTeamsOutlinedFunction(
6687 CGF
, S
, *CS
->getCapturedDecl()->param_begin(), InnermostKind
,
6690 const auto *NT
= S
.getSingleClause
<OMPNumTeamsClause
>();
6691 const auto *TL
= S
.getSingleClause
<OMPThreadLimitClause
>();
6693 const Expr
*NumTeams
= NT
? NT
->getNumTeams() : nullptr;
6694 const Expr
*ThreadLimit
= TL
? TL
->getThreadLimit() : nullptr;
6696 CGF
.CGM
.getOpenMPRuntime().emitNumTeamsClause(CGF
, NumTeams
, ThreadLimit
,
6700 OMPTeamsScope
Scope(CGF
, S
);
6701 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
6702 CGF
.GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
6703 CGF
.CGM
.getOpenMPRuntime().emitTeamsCall(CGF
, S
, S
.getBeginLoc(), OutlinedFn
,
6707 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective
&S
) {
6708 // Emit teams region as a standalone region.
6709 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6711 OMPPrivateScope
PrivateScope(CGF
);
6712 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
6713 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
6714 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6715 (void)PrivateScope
.Privatize();
6716 CGF
.EmitStmt(S
.getCapturedStmt(OMPD_teams
)->getCapturedStmt());
6717 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6719 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute
, CodeGen
);
6720 emitPostUpdateForReductionClause(*this, S
,
6721 [](CodeGenFunction
&) { return nullptr; });
6724 static void emitTargetTeamsRegion(CodeGenFunction
&CGF
, PrePostActionTy
&Action
,
6725 const OMPTargetTeamsDirective
&S
) {
6726 auto *CS
= S
.getCapturedStmt(OMPD_teams
);
6728 // Emit teams region as a standalone region.
6729 auto &&CodeGen
= [&S
, CS
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6731 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6732 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
6733 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
6734 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6735 (void)PrivateScope
.Privatize();
6736 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
6737 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
6738 CGF
.EmitStmt(CS
->getCapturedStmt());
6739 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6741 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_teams
, CodeGen
);
6742 emitPostUpdateForReductionClause(CGF
, S
,
6743 [](CodeGenFunction
&) { return nullptr; });
6746 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6747 CodeGenModule
&CGM
, StringRef ParentName
,
6748 const OMPTargetTeamsDirective
&S
) {
6749 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6750 emitTargetTeamsRegion(CGF
, Action
, S
);
6753 llvm::Constant
*Addr
;
6754 // Emit target region as a standalone region.
6755 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6756 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6757 assert(Fn
&& Addr
&& "Target device function emission failed.");
6760 void CodeGenFunction::EmitOMPTargetTeamsDirective(
6761 const OMPTargetTeamsDirective
&S
) {
6762 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6763 emitTargetTeamsRegion(CGF
, Action
, S
);
6765 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6769 emitTargetTeamsDistributeRegion(CodeGenFunction
&CGF
, PrePostActionTy
&Action
,
6770 const OMPTargetTeamsDistributeDirective
&S
) {
6772 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6773 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6776 // Emit teams region as a standalone region.
6777 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6778 PrePostActionTy
&Action
) {
6780 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6781 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6782 (void)PrivateScope
.Privatize();
6783 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6785 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6787 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute
, CodeGen
);
6788 emitPostUpdateForReductionClause(CGF
, S
,
6789 [](CodeGenFunction
&) { return nullptr; });
6792 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6793 CodeGenModule
&CGM
, StringRef ParentName
,
6794 const OMPTargetTeamsDistributeDirective
&S
) {
6795 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6796 emitTargetTeamsDistributeRegion(CGF
, Action
, S
);
6799 llvm::Constant
*Addr
;
6800 // Emit target region as a standalone region.
6801 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6802 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6803 assert(Fn
&& Addr
&& "Target device function emission failed.");
6806 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6807 const OMPTargetTeamsDistributeDirective
&S
) {
6808 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6809 emitTargetTeamsDistributeRegion(CGF
, Action
, S
);
6811 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6814 static void emitTargetTeamsDistributeSimdRegion(
6815 CodeGenFunction
&CGF
, PrePostActionTy
&Action
,
6816 const OMPTargetTeamsDistributeSimdDirective
&S
) {
6818 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6819 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6822 // Emit teams region as a standalone region.
6823 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6824 PrePostActionTy
&Action
) {
6826 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6827 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6828 (void)PrivateScope
.Privatize();
6829 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6831 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6833 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute_simd
, CodeGen
);
6834 emitPostUpdateForReductionClause(CGF
, S
,
6835 [](CodeGenFunction
&) { return nullptr; });
6838 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6839 CodeGenModule
&CGM
, StringRef ParentName
,
6840 const OMPTargetTeamsDistributeSimdDirective
&S
) {
6841 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6842 emitTargetTeamsDistributeSimdRegion(CGF
, Action
, S
);
6845 llvm::Constant
*Addr
;
6846 // Emit target region as a standalone region.
6847 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6848 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6849 assert(Fn
&& Addr
&& "Target device function emission failed.");
6852 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6853 const OMPTargetTeamsDistributeSimdDirective
&S
) {
6854 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6855 emitTargetTeamsDistributeSimdRegion(CGF
, Action
, S
);
6857 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6860 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6861 const OMPTeamsDistributeDirective
&S
) {
6863 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6864 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6867 // Emit teams region as a standalone region.
6868 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6869 PrePostActionTy
&Action
) {
6871 OMPPrivateScope
PrivateScope(CGF
);
6872 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6873 (void)PrivateScope
.Privatize();
6874 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6876 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6878 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute
, CodeGen
);
6879 emitPostUpdateForReductionClause(*this, S
,
6880 [](CodeGenFunction
&) { return nullptr; });
6883 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6884 const OMPTeamsDistributeSimdDirective
&S
) {
6885 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6886 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6889 // Emit teams region as a standalone region.
6890 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6891 PrePostActionTy
&Action
) {
6893 OMPPrivateScope
PrivateScope(CGF
);
6894 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6895 (void)PrivateScope
.Privatize();
6896 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_simd
,
6898 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6900 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute_simd
, CodeGen
);
6901 emitPostUpdateForReductionClause(*this, S
,
6902 [](CodeGenFunction
&) { return nullptr; });
6905 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
6906 const OMPTeamsDistributeParallelForDirective
&S
) {
6907 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6908 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
6912 // Emit teams region as a standalone region.
6913 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6914 PrePostActionTy
&Action
) {
6916 OMPPrivateScope
PrivateScope(CGF
);
6917 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6918 (void)PrivateScope
.Privatize();
6919 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6921 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6923 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute_parallel_for
, CodeGen
);
6924 emitPostUpdateForReductionClause(*this, S
,
6925 [](CodeGenFunction
&) { return nullptr; });
6928 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
6929 const OMPTeamsDistributeParallelForSimdDirective
&S
) {
6930 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6931 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
6935 // Emit teams region as a standalone region.
6936 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6937 PrePostActionTy
&Action
) {
6939 OMPPrivateScope
PrivateScope(CGF
);
6940 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6941 (void)PrivateScope
.Privatize();
6942 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
6943 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
6944 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6946 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute_parallel_for_simd
,
6948 emitPostUpdateForReductionClause(*this, S
,
6949 [](CodeGenFunction
&) { return nullptr; });
6952 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective
&S
) {
6953 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
6954 llvm::Value
*Device
= nullptr;
6955 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
6956 Device
= EmitScalarExpr(C
->getDevice());
6958 llvm::Value
*NumDependences
= nullptr;
6959 llvm::Value
*DependenceAddress
= nullptr;
6960 if (const auto *DC
= S
.getSingleClause
<OMPDependClause
>()) {
6961 OMPTaskDataTy::DependData
Dependencies(DC
->getDependencyKind(),
6963 Dependencies
.DepExprs
.append(DC
->varlist_begin(), DC
->varlist_end());
6964 std::pair
<llvm::Value
*, Address
> DependencePair
=
6965 CGM
.getOpenMPRuntime().emitDependClause(*this, Dependencies
,
6967 NumDependences
= DependencePair
.first
;
6968 DependenceAddress
= Builder
.CreatePointerCast(
6969 DependencePair
.second
.getPointer(), CGM
.Int8PtrTy
);
6972 assert(!(S
.hasClausesOfKind
<OMPNowaitClause
>() &&
6973 !(S
.getSingleClause
<OMPInitClause
>() ||
6974 S
.getSingleClause
<OMPDestroyClause
>() ||
6975 S
.getSingleClause
<OMPUseClause
>())) &&
6976 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
6978 if (const auto *C
= S
.getSingleClause
<OMPInitClause
>()) {
6979 llvm::Value
*InteropvarPtr
=
6980 EmitLValue(C
->getInteropVar()).getPointer(*this);
6981 llvm::omp::OMPInteropType InteropType
= llvm::omp::OMPInteropType::Unknown
;
6982 if (C
->getIsTarget()) {
6983 InteropType
= llvm::omp::OMPInteropType::Target
;
6985 assert(C
->getIsTargetSync() && "Expected interop-type target/targetsync");
6986 InteropType
= llvm::omp::OMPInteropType::TargetSync
;
6988 OMPBuilder
.createOMPInteropInit(Builder
, InteropvarPtr
, InteropType
, Device
,
6989 NumDependences
, DependenceAddress
,
6990 S
.hasClausesOfKind
<OMPNowaitClause
>());
6991 } else if (const auto *C
= S
.getSingleClause
<OMPDestroyClause
>()) {
6992 llvm::Value
*InteropvarPtr
=
6993 EmitLValue(C
->getInteropVar()).getPointer(*this);
6994 OMPBuilder
.createOMPInteropDestroy(Builder
, InteropvarPtr
, Device
,
6995 NumDependences
, DependenceAddress
,
6996 S
.hasClausesOfKind
<OMPNowaitClause
>());
6997 } else if (const auto *C
= S
.getSingleClause
<OMPUseClause
>()) {
6998 llvm::Value
*InteropvarPtr
=
6999 EmitLValue(C
->getInteropVar()).getPointer(*this);
7000 OMPBuilder
.createOMPInteropUse(Builder
, InteropvarPtr
, Device
,
7001 NumDependences
, DependenceAddress
,
7002 S
.hasClausesOfKind
<OMPNowaitClause
>());
7006 static void emitTargetTeamsDistributeParallelForRegion(
7007 CodeGenFunction
&CGF
, const OMPTargetTeamsDistributeParallelForDirective
&S
,
7008 PrePostActionTy
&Action
) {
7010 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7011 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
7015 // Emit teams region as a standalone region.
7016 auto &&CodeGenTeams
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
7017 PrePostActionTy
&Action
) {
7019 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
7020 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7021 (void)PrivateScope
.Privatize();
7022 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
7023 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
7024 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
7027 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute_parallel_for
,
7029 emitPostUpdateForReductionClause(CGF
, S
,
7030 [](CodeGenFunction
&) { return nullptr; });
7033 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7034 CodeGenModule
&CGM
, StringRef ParentName
,
7035 const OMPTargetTeamsDistributeParallelForDirective
&S
) {
7036 // Emit SPMD target teams distribute parallel for region as a standalone
7038 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7039 emitTargetTeamsDistributeParallelForRegion(CGF
, S
, Action
);
7042 llvm::Constant
*Addr
;
7043 // Emit target region as a standalone region.
7044 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7045 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7046 assert(Fn
&& Addr
&& "Target device function emission failed.");
7049 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7050 const OMPTargetTeamsDistributeParallelForDirective
&S
) {
7051 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7052 emitTargetTeamsDistributeParallelForRegion(CGF
, S
, Action
);
7054 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7057 static void emitTargetTeamsDistributeParallelForSimdRegion(
7058 CodeGenFunction
&CGF
,
7059 const OMPTargetTeamsDistributeParallelForSimdDirective
&S
,
7060 PrePostActionTy
&Action
) {
7062 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7063 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
7067 // Emit teams region as a standalone region.
7068 auto &&CodeGenTeams
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
7069 PrePostActionTy
&Action
) {
7071 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
7072 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7073 (void)PrivateScope
.Privatize();
7074 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
7075 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
7076 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
7079 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute_parallel_for_simd
,
7081 emitPostUpdateForReductionClause(CGF
, S
,
7082 [](CodeGenFunction
&) { return nullptr; });
7085 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7086 CodeGenModule
&CGM
, StringRef ParentName
,
7087 const OMPTargetTeamsDistributeParallelForSimdDirective
&S
) {
7088 // Emit SPMD target teams distribute parallel for simd region as a standalone
7090 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7091 emitTargetTeamsDistributeParallelForSimdRegion(CGF
, S
, Action
);
7094 llvm::Constant
*Addr
;
7095 // Emit target region as a standalone region.
7096 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7097 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7098 assert(Fn
&& Addr
&& "Target device function emission failed.");
7101 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7102 const OMPTargetTeamsDistributeParallelForSimdDirective
&S
) {
7103 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7104 emitTargetTeamsDistributeParallelForSimdRegion(CGF
, S
, Action
);
7106 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7109 void CodeGenFunction::EmitOMPCancellationPointDirective(
7110 const OMPCancellationPointDirective
&S
) {
7111 CGM
.getOpenMPRuntime().emitCancellationPointCall(*this, S
.getBeginLoc(),
7112 S
.getCancelRegion());
7115 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective
&S
) {
7116 const Expr
*IfCond
= nullptr;
7117 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
7118 if (C
->getNameModifier() == OMPD_unknown
||
7119 C
->getNameModifier() == OMPD_cancel
) {
7120 IfCond
= C
->getCondition();
7124 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
7125 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
7126 // TODO: This check is necessary as we only generate `omp parallel` through
7127 // the OpenMPIRBuilder for now.
7128 if (S
.getCancelRegion() == OMPD_parallel
||
7129 S
.getCancelRegion() == OMPD_sections
||
7130 S
.getCancelRegion() == OMPD_section
) {
7131 llvm::Value
*IfCondition
= nullptr;
7133 IfCondition
= EmitScalarExpr(IfCond
,
7134 /*IgnoreResultAssign=*/true);
7135 return Builder
.restoreIP(
7136 OMPBuilder
.createCancel(Builder
, IfCondition
, S
.getCancelRegion()));
7140 CGM
.getOpenMPRuntime().emitCancelCall(*this, S
.getBeginLoc(), IfCond
,
7141 S
.getCancelRegion());
7144 CodeGenFunction::JumpDest
7145 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind
) {
7146 if (Kind
== OMPD_parallel
|| Kind
== OMPD_task
||
7147 Kind
== OMPD_target_parallel
|| Kind
== OMPD_taskloop
||
7148 Kind
== OMPD_master_taskloop
|| Kind
== OMPD_parallel_master_taskloop
)
7150 assert(Kind
== OMPD_for
|| Kind
== OMPD_section
|| Kind
== OMPD_sections
||
7151 Kind
== OMPD_parallel_sections
|| Kind
== OMPD_parallel_for
||
7152 Kind
== OMPD_distribute_parallel_for
||
7153 Kind
== OMPD_target_parallel_for
||
7154 Kind
== OMPD_teams_distribute_parallel_for
||
7155 Kind
== OMPD_target_teams_distribute_parallel_for
);
7156 return OMPCancelStack
.getExitBlock();
7159 void CodeGenFunction::EmitOMPUseDevicePtrClause(
7160 const OMPUseDevicePtrClause
&C
, OMPPrivateScope
&PrivateScope
,
7161 const llvm::DenseMap
<const ValueDecl
*, Address
> &CaptureDeviceAddrMap
) {
7162 auto OrigVarIt
= C
.varlist_begin();
7163 auto InitIt
= C
.inits().begin();
7164 for (const Expr
*PvtVarIt
: C
.private_copies()) {
7165 const auto *OrigVD
=
7166 cast
<VarDecl
>(cast
<DeclRefExpr
>(*OrigVarIt
)->getDecl());
7167 const auto *InitVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*InitIt
)->getDecl());
7168 const auto *PvtVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(PvtVarIt
)->getDecl());
7170 // In order to identify the right initializer we need to match the
7171 // declaration used by the mapping logic. In some cases we may get
7172 // OMPCapturedExprDecl that refers to the original declaration.
7173 const ValueDecl
*MatchingVD
= OrigVD
;
7174 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(MatchingVD
)) {
7175 // OMPCapturedExprDecl are used to privative fields of the current
7177 const auto *ME
= cast
<MemberExpr
>(OED
->getInit());
7178 assert(isa
<CXXThisExpr
>(ME
->getBase()->IgnoreImpCasts()) &&
7179 "Base should be the current struct!");
7180 MatchingVD
= ME
->getMemberDecl();
7183 // If we don't have information about the current list item, move on to
7185 auto InitAddrIt
= CaptureDeviceAddrMap
.find(MatchingVD
);
7186 if (InitAddrIt
== CaptureDeviceAddrMap
.end())
7189 // Initialize the temporary initialization variable with the address
7190 // we get from the runtime library. We have to cast the source address
7191 // because it is always a void *. References are materialized in the
7192 // privatization scope, so the initialization here disregards the fact
7193 // the original variable is a reference.
7194 llvm::Type
*Ty
= ConvertTypeForMem(OrigVD
->getType().getNonReferenceType());
7195 Address InitAddr
= Builder
.CreateElementBitCast(InitAddrIt
->second
, Ty
);
7196 setAddrOfLocalVar(InitVD
, InitAddr
);
7198 // Emit private declaration, it will be initialized by the value we
7199 // declaration we just added to the local declarations map.
7202 // The initialization variables reached its purpose in the emission
7203 // of the previous declaration, so we don't need it anymore.
7204 LocalDeclMap
.erase(InitVD
);
7206 // Return the address of the private variable.
7208 PrivateScope
.addPrivate(OrigVD
, GetAddrOfLocalVar(PvtVD
));
7209 assert(IsRegistered
&& "firstprivate var already registered as private");
7210 // Silence the warning about unused variable.
7218 static const VarDecl
*getBaseDecl(const Expr
*Ref
) {
7219 const Expr
*Base
= Ref
->IgnoreParenImpCasts();
7220 while (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(Base
))
7221 Base
= OASE
->getBase()->IgnoreParenImpCasts();
7222 while (const auto *ASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
7223 Base
= ASE
->getBase()->IgnoreParenImpCasts();
7224 return cast
<VarDecl
>(cast
<DeclRefExpr
>(Base
)->getDecl());
7227 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7228 const OMPUseDeviceAddrClause
&C
, OMPPrivateScope
&PrivateScope
,
7229 const llvm::DenseMap
<const ValueDecl
*, Address
> &CaptureDeviceAddrMap
) {
7230 llvm::SmallDenseSet
<CanonicalDeclPtr
<const Decl
>, 4> Processed
;
7231 for (const Expr
*Ref
: C
.varlists()) {
7232 const VarDecl
*OrigVD
= getBaseDecl(Ref
);
7233 if (!Processed
.insert(OrigVD
).second
)
7235 // In order to identify the right initializer we need to match the
7236 // declaration used by the mapping logic. In some cases we may get
7237 // OMPCapturedExprDecl that refers to the original declaration.
7238 const ValueDecl
*MatchingVD
= OrigVD
;
7239 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(MatchingVD
)) {
7240 // OMPCapturedExprDecl are used to privative fields of the current
7242 const auto *ME
= cast
<MemberExpr
>(OED
->getInit());
7243 assert(isa
<CXXThisExpr
>(ME
->getBase()) &&
7244 "Base should be the current struct!");
7245 MatchingVD
= ME
->getMemberDecl();
7248 // If we don't have information about the current list item, move on to
7250 auto InitAddrIt
= CaptureDeviceAddrMap
.find(MatchingVD
);
7251 if (InitAddrIt
== CaptureDeviceAddrMap
.end())
7254 Address PrivAddr
= InitAddrIt
->getSecond();
7255 // For declrefs and variable length array need to load the pointer for
7256 // correct mapping, since the pointer to the data was passed to the runtime.
7257 if (isa
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts()) ||
7258 MatchingVD
->getType()->isArrayType()) {
7259 QualType PtrTy
= getContext().getPointerType(
7260 OrigVD
->getType().getNonReferenceType());
7261 PrivAddr
= EmitLoadOfPointer(
7262 Builder
.CreateElementBitCast(PrivAddr
, ConvertTypeForMem(PtrTy
)),
7263 PtrTy
->castAs
<PointerType
>());
7266 (void)PrivateScope
.addPrivate(OrigVD
, PrivAddr
);
7270 // Generate the instructions for '#pragma omp target data' directive.
7271 void CodeGenFunction::EmitOMPTargetDataDirective(
7272 const OMPTargetDataDirective
&S
) {
7273 CGOpenMPRuntime::TargetDataInfo
Info(/*RequiresDevicePointerInfo=*/true,
7274 /*SeparateBeginEndCalls=*/true);
7276 // Create a pre/post action to signal the privatization of the device pointer.
7277 // This action can be replaced by the OpenMP runtime code generation to
7278 // deactivate privatization.
7279 bool PrivatizeDevicePointers
= false;
7280 class DevicePointerPrivActionTy
: public PrePostActionTy
{
7281 bool &PrivatizeDevicePointers
;
7284 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers
)
7285 : PrivatizeDevicePointers(PrivatizeDevicePointers
) {}
7286 void Enter(CodeGenFunction
&CGF
) override
{
7287 PrivatizeDevicePointers
= true;
7290 DevicePointerPrivActionTy
PrivAction(PrivatizeDevicePointers
);
7292 auto &&CodeGen
= [&](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7293 auto &&InnermostCodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7294 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
7297 // Codegen that selects whether to generate the privatization code or not.
7298 auto &&PrivCodeGen
= [&](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7299 RegionCodeGenTy
RCG(InnermostCodeGen
);
7300 PrivatizeDevicePointers
= false;
7302 // Call the pre-action to change the status of PrivatizeDevicePointers if
7306 if (PrivatizeDevicePointers
) {
7307 OMPPrivateScope
PrivateScope(CGF
);
7308 // Emit all instances of the use_device_ptr clause.
7309 for (const auto *C
: S
.getClausesOfKind
<OMPUseDevicePtrClause
>())
7310 CGF
.EmitOMPUseDevicePtrClause(*C
, PrivateScope
,
7311 Info
.CaptureDeviceAddrMap
);
7312 for (const auto *C
: S
.getClausesOfKind
<OMPUseDeviceAddrClause
>())
7313 CGF
.EmitOMPUseDeviceAddrClause(*C
, PrivateScope
,
7314 Info
.CaptureDeviceAddrMap
);
7315 (void)PrivateScope
.Privatize();
7318 // If we don't have target devices, don't bother emitting the data
7320 std::optional
<OpenMPDirectiveKind
> CaptureRegion
;
7321 if (CGM
.getLangOpts().OMPTargetTriples
.empty()) {
7322 // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7323 for (const auto *C
: S
.getClausesOfKind
<OMPUseDevicePtrClause
>())
7324 for (const Expr
*E
: C
->varlists()) {
7325 const Decl
*D
= cast
<DeclRefExpr
>(E
)->getDecl();
7326 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(D
))
7327 CGF
.EmitVarDecl(*OED
);
7329 for (const auto *C
: S
.getClausesOfKind
<OMPUseDeviceAddrClause
>())
7330 for (const Expr
*E
: C
->varlists()) {
7331 const Decl
*D
= getBaseDecl(E
);
7332 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(D
))
7333 CGF
.EmitVarDecl(*OED
);
7336 CaptureRegion
= OMPD_unknown
;
7339 OMPLexicalScope
Scope(CGF
, S
, CaptureRegion
);
7344 // Forward the provided action to the privatization codegen.
7345 RegionCodeGenTy
PrivRCG(PrivCodeGen
);
7346 PrivRCG
.setAction(Action
);
7348 // Notwithstanding the body of the region is emitted as inlined directive,
7349 // we don't use an inline scope as changes in the references inside the
7350 // region are expected to be visible outside, so we do not privative them.
7351 OMPLexicalScope
Scope(CGF
, S
);
7352 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_target_data
,
7356 RegionCodeGenTy
RCG(CodeGen
);
7358 // If we don't have target devices, don't bother emitting the data mapping
7360 if (CGM
.getLangOpts().OMPTargetTriples
.empty()) {
7365 // Check if we have any if clause associated with the directive.
7366 const Expr
*IfCond
= nullptr;
7367 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7368 IfCond
= C
->getCondition();
7370 // Check if we have any device clause associated with the directive.
7371 const Expr
*Device
= nullptr;
7372 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7373 Device
= C
->getDevice();
7375 // Set the action to signal privatization of device pointers.
7376 RCG
.setAction(PrivAction
);
7378 // Emit region code.
7379 CGM
.getOpenMPRuntime().emitTargetDataCalls(*this, S
, IfCond
, Device
, RCG
,
7383 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7384 const OMPTargetEnterDataDirective
&S
) {
7385 // If we don't have target devices, don't bother emitting the data mapping
7387 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
7390 // Check if we have any if clause associated with the directive.
7391 const Expr
*IfCond
= nullptr;
7392 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7393 IfCond
= C
->getCondition();
7395 // Check if we have any device clause associated with the directive.
7396 const Expr
*Device
= nullptr;
7397 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7398 Device
= C
->getDevice();
7400 OMPLexicalScope
Scope(*this, S
, OMPD_task
);
7401 CGM
.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S
, IfCond
, Device
);
7404 void CodeGenFunction::EmitOMPTargetExitDataDirective(
7405 const OMPTargetExitDataDirective
&S
) {
7406 // If we don't have target devices, don't bother emitting the data mapping
7408 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
7411 // Check if we have any if clause associated with the directive.
7412 const Expr
*IfCond
= nullptr;
7413 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7414 IfCond
= C
->getCondition();
7416 // Check if we have any device clause associated with the directive.
7417 const Expr
*Device
= nullptr;
7418 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7419 Device
= C
->getDevice();
7421 OMPLexicalScope
Scope(*this, S
, OMPD_task
);
7422 CGM
.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S
, IfCond
, Device
);
7425 static void emitTargetParallelRegion(CodeGenFunction
&CGF
,
7426 const OMPTargetParallelDirective
&S
,
7427 PrePostActionTy
&Action
) {
7428 // Get the captured statement associated with the 'parallel' region.
7429 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_parallel
);
7431 auto &&CodeGen
= [&S
, CS
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7433 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
7434 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
7435 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
7436 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7437 (void)PrivateScope
.Privatize();
7438 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
7439 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
7440 // TODO: Add support for clauses.
7441 CGF
.EmitStmt(CS
->getCapturedStmt());
7442 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
7444 emitCommonOMPParallelDirective(CGF
, S
, OMPD_parallel
, CodeGen
,
7445 emitEmptyBoundParameters
);
7446 emitPostUpdateForReductionClause(CGF
, S
,
7447 [](CodeGenFunction
&) { return nullptr; });
7450 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7451 CodeGenModule
&CGM
, StringRef ParentName
,
7452 const OMPTargetParallelDirective
&S
) {
7453 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7454 emitTargetParallelRegion(CGF
, S
, Action
);
7457 llvm::Constant
*Addr
;
7458 // Emit target region as a standalone region.
7459 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7460 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7461 assert(Fn
&& Addr
&& "Target device function emission failed.");
7464 void CodeGenFunction::EmitOMPTargetParallelDirective(
7465 const OMPTargetParallelDirective
&S
) {
7466 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7467 emitTargetParallelRegion(CGF
, S
, Action
);
7469 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7472 static void emitTargetParallelForRegion(CodeGenFunction
&CGF
,
7473 const OMPTargetParallelForDirective
&S
,
7474 PrePostActionTy
&Action
) {
7476 // Emit directive as a combined directive that consists of two implicit
7477 // directives: 'parallel' with 'for' directive.
7478 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7480 CodeGenFunction::OMPCancelStackRAII
CancelRegion(
7481 CGF
, OMPD_target_parallel_for
, S
.hasCancel());
7482 CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(), emitForLoopBounds
,
7483 emitDispatchForLoopBounds
);
7485 emitCommonOMPParallelDirective(CGF
, S
, OMPD_for
, CodeGen
,
7486 emitEmptyBoundParameters
);
7489 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7490 CodeGenModule
&CGM
, StringRef ParentName
,
7491 const OMPTargetParallelForDirective
&S
) {
7492 // Emit SPMD target parallel for region as a standalone region.
7493 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7494 emitTargetParallelForRegion(CGF
, S
, Action
);
7497 llvm::Constant
*Addr
;
7498 // Emit target region as a standalone region.
7499 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7500 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7501 assert(Fn
&& Addr
&& "Target device function emission failed.");
7504 void CodeGenFunction::EmitOMPTargetParallelForDirective(
7505 const OMPTargetParallelForDirective
&S
) {
7506 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7507 emitTargetParallelForRegion(CGF
, S
, Action
);
7509 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7513 emitTargetParallelForSimdRegion(CodeGenFunction
&CGF
,
7514 const OMPTargetParallelForSimdDirective
&S
,
7515 PrePostActionTy
&Action
) {
7517 // Emit directive as a combined directive that consists of two implicit
7518 // directives: 'parallel' with 'for' directive.
7519 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7521 CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(), emitForLoopBounds
,
7522 emitDispatchForLoopBounds
);
7524 emitCommonOMPParallelDirective(CGF
, S
, OMPD_simd
, CodeGen
,
7525 emitEmptyBoundParameters
);
7528 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7529 CodeGenModule
&CGM
, StringRef ParentName
,
7530 const OMPTargetParallelForSimdDirective
&S
) {
7531 // Emit SPMD target parallel for region as a standalone region.
7532 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7533 emitTargetParallelForSimdRegion(CGF
, S
, Action
);
7536 llvm::Constant
*Addr
;
7537 // Emit target region as a standalone region.
7538 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7539 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7540 assert(Fn
&& Addr
&& "Target device function emission failed.");
7543 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7544 const OMPTargetParallelForSimdDirective
&S
) {
7545 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7546 emitTargetParallelForSimdRegion(CGF
, S
, Action
);
7548 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7551 /// Emit a helper variable and return corresponding lvalue.
7552 static void mapParam(CodeGenFunction
&CGF
, const DeclRefExpr
*Helper
,
7553 const ImplicitParamDecl
*PVD
,
7554 CodeGenFunction::OMPPrivateScope
&Privates
) {
7555 const auto *VDecl
= cast
<VarDecl
>(Helper
->getDecl());
7556 Privates
.addPrivate(VDecl
, CGF
.GetAddrOfLocalVar(PVD
));
7559 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective
&S
) {
7560 assert(isOpenMPTaskLoopDirective(S
.getDirectiveKind()));
7561 // Emit outlined function for task construct.
7562 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_taskloop
);
7563 Address CapturedStruct
= Address::invalid();
7565 OMPLexicalScope
Scope(*this, S
, OMPD_taskloop
, /*EmitPreInitStmt=*/false);
7566 CapturedStruct
= GenerateCapturedStmtArgument(*CS
);
7568 QualType SharedsTy
= getContext().getRecordType(CS
->getCapturedRecordDecl());
7569 const Expr
*IfCond
= nullptr;
7570 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
7571 if (C
->getNameModifier() == OMPD_unknown
||
7572 C
->getNameModifier() == OMPD_taskloop
) {
7573 IfCond
= C
->getCondition();
7579 // Check if taskloop must be emitted without taskgroup.
7580 Data
.Nogroup
= S
.getSingleClause
<OMPNogroupClause
>();
7581 // TODO: Check if we should emit tied or untied task.
7583 // Set scheduling for taskloop
7584 if (const auto *Clause
= S
.getSingleClause
<OMPGrainsizeClause
>()) {
7586 Data
.Schedule
.setInt(/*IntVal=*/false);
7587 Data
.Schedule
.setPointer(EmitScalarExpr(Clause
->getGrainsize()));
7588 } else if (const auto *Clause
= S
.getSingleClause
<OMPNumTasksClause
>()) {
7590 Data
.Schedule
.setInt(/*IntVal=*/true);
7591 Data
.Schedule
.setPointer(EmitScalarExpr(Clause
->getNumTasks()));
7594 auto &&BodyGen
= [CS
, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7596 // for (IV in 0..LastIteration) BODY;
7597 // <Final counter/linear vars updates>;
7601 // Emit: if (PreCond) - begin.
7602 // If the condition constant folds and can be elided, avoid emitting the
7605 llvm::BasicBlock
*ContBlock
= nullptr;
7606 OMPLoopScope
PreInitScope(CGF
, S
);
7607 if (CGF
.ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
7611 llvm::BasicBlock
*ThenBlock
= CGF
.createBasicBlock("taskloop.if.then");
7612 ContBlock
= CGF
.createBasicBlock("taskloop.if.end");
7613 emitPreCond(CGF
, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
7614 CGF
.getProfileCount(&S
));
7615 CGF
.EmitBlock(ThenBlock
);
7616 CGF
.incrementProfileCounter(&S
);
7619 (void)CGF
.EmitOMPLinearClauseInit(S
);
7621 OMPPrivateScope
LoopScope(CGF
);
7622 // Emit helper vars inits.
7623 enum { LowerBound
= 5, UpperBound
, Stride
, LastIter
};
7624 auto *I
= CS
->getCapturedDecl()->param_begin();
7625 auto *LBP
= std::next(I
, LowerBound
);
7626 auto *UBP
= std::next(I
, UpperBound
);
7627 auto *STP
= std::next(I
, Stride
);
7628 auto *LIP
= std::next(I
, LastIter
);
7629 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getLowerBoundVariable()), *LBP
,
7631 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getUpperBoundVariable()), *UBP
,
7633 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getStrideVariable()), *STP
, LoopScope
);
7634 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getIsLastIterVariable()), *LIP
,
7636 CGF
.EmitOMPPrivateLoopCounters(S
, LoopScope
);
7637 CGF
.EmitOMPLinearClause(S
, LoopScope
);
7638 bool HasLastprivateClause
= CGF
.EmitOMPLastprivateClauseInit(S
, LoopScope
);
7639 (void)LoopScope
.Privatize();
7640 // Emit the loop iteration variable.
7641 const Expr
*IVExpr
= S
.getIterationVariable();
7642 const auto *IVDecl
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IVExpr
)->getDecl());
7643 CGF
.EmitVarDecl(*IVDecl
);
7644 CGF
.EmitIgnoredExpr(S
.getInit());
7646 // Emit the iterations count variable.
7647 // If it is not a variable, Sema decided to calculate iterations count on
7648 // each iteration (e.g., it is foldable into a constant).
7649 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
7650 CGF
.EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
7651 // Emit calculation of the iterations count.
7652 CGF
.EmitIgnoredExpr(S
.getCalcLastIteration());
7656 OMPLexicalScope
Scope(CGF
, S
, OMPD_taskloop
, /*EmitPreInitStmt=*/false);
7659 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7660 if (isOpenMPSimdDirective(S
.getDirectiveKind()))
7661 CGF
.EmitOMPSimdInit(S
);
7663 [&S
, &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7664 CGF
.EmitOMPInnerLoop(
7665 S
, LoopScope
.requiresCleanups(), S
.getCond(), S
.getInc(),
7666 [&S
](CodeGenFunction
&CGF
) {
7667 emitOMPLoopBodyWithStopPoint(CGF
, S
,
7668 CodeGenFunction::JumpDest());
7670 [](CodeGenFunction
&) {});
7673 // Emit: if (PreCond) - end.
7675 CGF
.EmitBranch(ContBlock
);
7676 CGF
.EmitBlock(ContBlock
, true);
7678 // Emit final copy of the lastprivate variables if IsLastIter != 0.
7679 if (HasLastprivateClause
) {
7680 CGF
.EmitOMPLastprivateClauseFinal(
7681 S
, isOpenMPSimdDirective(S
.getDirectiveKind()),
7682 CGF
.Builder
.CreateIsNotNull(CGF
.EmitLoadOfScalar(
7683 CGF
.GetAddrOfLocalVar(*LIP
), /*Volatile=*/false,
7684 (*LIP
)->getType(), S
.getBeginLoc())));
7686 LoopScope
.restoreMap();
7687 CGF
.EmitOMPLinearClauseFinal(S
, [LIP
, &S
](CodeGenFunction
&CGF
) {
7688 return CGF
.Builder
.CreateIsNotNull(
7689 CGF
.EmitLoadOfScalar(CGF
.GetAddrOfLocalVar(*LIP
), /*Volatile=*/false,
7690 (*LIP
)->getType(), S
.getBeginLoc()));
7693 auto &&TaskGen
= [&S
, SharedsTy
, CapturedStruct
,
7694 IfCond
](CodeGenFunction
&CGF
, llvm::Function
*OutlinedFn
,
7695 const OMPTaskDataTy
&Data
) {
7696 auto &&CodeGen
= [&S
, OutlinedFn
, SharedsTy
, CapturedStruct
, IfCond
,
7697 &Data
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7698 OMPLoopScope
PreInitScope(CGF
, S
);
7699 CGF
.CGM
.getOpenMPRuntime().emitTaskLoopCall(CGF
, S
.getBeginLoc(), S
,
7700 OutlinedFn
, SharedsTy
,
7701 CapturedStruct
, IfCond
, Data
);
7703 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_taskloop
,
7707 EmitOMPTaskBasedDirective(S
, OMPD_taskloop
, BodyGen
, TaskGen
, Data
);
7709 CGM
.getOpenMPRuntime().emitTaskgroupRegion(
7711 [&S
, &BodyGen
, &TaskGen
, &Data
](CodeGenFunction
&CGF
,
7712 PrePostActionTy
&Action
) {
7714 CGF
.EmitOMPTaskBasedDirective(S
, OMPD_taskloop
, BodyGen
, TaskGen
,
7721 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective
&S
) {
7723 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7724 EmitOMPTaskLoopBasedDirective(S
);
7727 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
7728 const OMPTaskLoopSimdDirective
&S
) {
7730 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7731 OMPLexicalScope
Scope(*this, S
);
7732 EmitOMPTaskLoopBasedDirective(S
);
7735 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7736 const OMPMasterTaskLoopDirective
&S
) {
7737 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7739 EmitOMPTaskLoopBasedDirective(S
);
7742 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7743 OMPLexicalScope
Scope(*this, S
, std::nullopt
, /*EmitPreInitStmt=*/false);
7744 CGM
.getOpenMPRuntime().emitMasterRegion(*this, CodeGen
, S
.getBeginLoc());
7747 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7748 const OMPMasterTaskLoopSimdDirective
&S
) {
7749 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7751 EmitOMPTaskLoopBasedDirective(S
);
7754 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7755 OMPLexicalScope
Scope(*this, S
);
7756 CGM
.getOpenMPRuntime().emitMasterRegion(*this, CodeGen
, S
.getBeginLoc());
7759 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7760 const OMPParallelMasterTaskLoopDirective
&S
) {
7761 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7762 auto &&TaskLoopCodeGen
= [&S
](CodeGenFunction
&CGF
,
7763 PrePostActionTy
&Action
) {
7765 CGF
.EmitOMPTaskLoopBasedDirective(S
);
7767 OMPLexicalScope
Scope(CGF
, S
, OMPD_parallel
, /*EmitPreInitStmt=*/false);
7768 CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, TaskLoopCodeGen
,
7772 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7773 emitCommonOMPParallelDirective(*this, S
, OMPD_master_taskloop
, CodeGen
,
7774 emitEmptyBoundParameters
);
7777 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7778 const OMPParallelMasterTaskLoopSimdDirective
&S
) {
7779 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7780 auto &&TaskLoopCodeGen
= [&S
](CodeGenFunction
&CGF
,
7781 PrePostActionTy
&Action
) {
7783 CGF
.EmitOMPTaskLoopBasedDirective(S
);
7785 OMPLexicalScope
Scope(CGF
, S
, OMPD_parallel
, /*EmitPreInitStmt=*/false);
7786 CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, TaskLoopCodeGen
,
7790 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7791 emitCommonOMPParallelDirective(*this, S
, OMPD_master_taskloop_simd
, CodeGen
,
7792 emitEmptyBoundParameters
);
7795 // Generate the instructions for '#pragma omp target update' directive.
7796 void CodeGenFunction::EmitOMPTargetUpdateDirective(
7797 const OMPTargetUpdateDirective
&S
) {
7798 // If we don't have target devices, don't bother emitting the data mapping
7800 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
7803 // Check if we have any if clause associated with the directive.
7804 const Expr
*IfCond
= nullptr;
7805 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7806 IfCond
= C
->getCondition();
7808 // Check if we have any device clause associated with the directive.
7809 const Expr
*Device
= nullptr;
7810 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7811 Device
= C
->getDevice();
7813 OMPLexicalScope
Scope(*this, S
, OMPD_task
);
7814 CGM
.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S
, IfCond
, Device
);
7817 void CodeGenFunction::EmitOMPGenericLoopDirective(
7818 const OMPGenericLoopDirective
&S
) {
7819 // Unimplemented, just inline the underlying statement for now.
7820 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7821 // Emit the loop iteration variable.
7823 cast
<CapturedStmt
>(S
.getAssociatedStmt())->getCapturedStmt();
7824 const auto *ForS
= dyn_cast
<ForStmt
>(CS
);
7825 if (ForS
&& !isa
<DeclStmt
>(ForS
->getInit())) {
7826 OMPPrivateScope
LoopScope(CGF
);
7827 CGF
.EmitOMPPrivateLoopCounters(S
, LoopScope
);
7828 (void)LoopScope
.Privatize();
7830 LoopScope
.restoreMap();
7835 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
7836 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop
, CodeGen
);
7839 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
7840 const OMPExecutableDirective
&D
) {
7841 if (const auto *SD
= dyn_cast
<OMPScanDirective
>(&D
)) {
7842 EmitOMPScanDirective(*SD
);
7845 if (!D
.hasAssociatedStmt() || !D
.getAssociatedStmt())
7847 auto &&CodeGen
= [&D
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7848 OMPPrivateScope
GlobalsScope(CGF
);
7849 if (isOpenMPTaskingDirective(D
.getDirectiveKind())) {
7850 // Capture global firstprivates to avoid crash.
7851 for (const auto *C
: D
.getClausesOfKind
<OMPFirstprivateClause
>()) {
7852 for (const Expr
*Ref
: C
->varlists()) {
7853 const auto *DRE
= cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
7856 const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl());
7857 if (!VD
|| VD
->hasLocalStorage())
7859 if (!CGF
.LocalDeclMap
.count(VD
)) {
7860 LValue GlobLVal
= CGF
.EmitLValue(Ref
);
7861 GlobalsScope
.addPrivate(VD
, GlobLVal
.getAddress(CGF
));
7866 if (isOpenMPSimdDirective(D
.getDirectiveKind())) {
7867 (void)GlobalsScope
.Privatize();
7868 ParentLoopDirectiveForScanRegion
ScanRegion(CGF
, D
);
7869 emitOMPSimdRegion(CGF
, cast
<OMPLoopDirective
>(D
), Action
);
7871 if (const auto *LD
= dyn_cast
<OMPLoopDirective
>(&D
)) {
7872 for (const Expr
*E
: LD
->counters()) {
7873 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
7874 if (!VD
->hasLocalStorage() && !CGF
.LocalDeclMap
.count(VD
)) {
7875 LValue GlobLVal
= CGF
.EmitLValue(E
);
7876 GlobalsScope
.addPrivate(VD
, GlobLVal
.getAddress(CGF
));
7878 if (isa
<OMPCapturedExprDecl
>(VD
)) {
7879 // Emit only those that were not explicitly referenced in clauses.
7880 if (!CGF
.LocalDeclMap
.count(VD
))
7881 CGF
.EmitVarDecl(*VD
);
7884 for (const auto *C
: D
.getClausesOfKind
<OMPOrderedClause
>()) {
7885 if (!C
->getNumForLoops())
7887 for (unsigned I
= LD
->getLoopsNumber(),
7888 E
= C
->getLoopNumIterations().size();
7890 if (const auto *VD
= dyn_cast
<OMPCapturedExprDecl
>(
7891 cast
<DeclRefExpr
>(C
->getLoopCounter(I
))->getDecl())) {
7892 // Emit only those that were not explicitly referenced in clauses.
7893 if (!CGF
.LocalDeclMap
.count(VD
))
7894 CGF
.EmitVarDecl(*VD
);
7899 (void)GlobalsScope
.Privatize();
7900 CGF
.EmitStmt(D
.getInnermostCapturedStmt()->getCapturedStmt());
7903 if (D
.getDirectiveKind() == OMPD_atomic
||
7904 D
.getDirectiveKind() == OMPD_critical
||
7905 D
.getDirectiveKind() == OMPD_section
||
7906 D
.getDirectiveKind() == OMPD_master
||
7907 D
.getDirectiveKind() == OMPD_masked
) {
7908 EmitStmt(D
.getAssociatedStmt());
7911 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D
);
7912 OMPSimdLexicalScope
Scope(*this, D
);
7913 CGM
.getOpenMPRuntime().emitInlinedDirective(
7915 isOpenMPSimdDirective(D
.getDirectiveKind()) ? OMPD_simd
7916 : D
.getDirectiveKind(),
7919 // Check for outer lastprivate conditional update.
7920 checkForLastprivateConditionalUpdate(*this, D
);