1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This contains code to emit OpenMP nodes as LLVM code.
11 //===----------------------------------------------------------------------===//
13 #include "CGCleanup.h"
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/BinaryFormat/Dwarf.h"
29 #include "llvm/Frontend/OpenMP/OMPConstants.h"
30 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/DebugInfoMetadata.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Metadata.h"
36 #include "llvm/Support/AtomicOrdering.h"
38 using namespace clang
;
39 using namespace CodeGen
;
40 using namespace llvm::omp
;
42 static const VarDecl
*getBaseDecl(const Expr
*Ref
);
45 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
46 /// for captured expressions.
47 class OMPLexicalScope
: public CodeGenFunction::LexicalScope
{
48 void emitPreInitStmt(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
49 for (const auto *C
: S
.clauses()) {
50 if (const auto *CPI
= OMPClauseWithPreInit::get(C
)) {
51 if (const auto *PreInit
=
52 cast_or_null
<DeclStmt
>(CPI
->getPreInitStmt())) {
53 for (const auto *I
: PreInit
->decls()) {
54 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
55 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
57 CodeGenFunction::AutoVarEmission Emission
=
58 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
59 CGF
.EmitAutoVarCleanups(Emission
);
66 CodeGenFunction::OMPPrivateScope InlinedShareds
;
68 static bool isCapturedVar(CodeGenFunction
&CGF
, const VarDecl
*VD
) {
69 return CGF
.LambdaCaptureFields
.lookup(VD
) ||
70 (CGF
.CapturedStmtInfo
&& CGF
.CapturedStmtInfo
->lookup(VD
)) ||
71 (CGF
.CurCodeDecl
&& isa
<BlockDecl
>(CGF
.CurCodeDecl
) &&
72 cast
<BlockDecl
>(CGF
.CurCodeDecl
)->capturesVariable(VD
));
77 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
78 const std::optional
<OpenMPDirectiveKind
> CapturedRegion
= std::nullopt
,
79 const bool EmitPreInitStmt
= true)
80 : CodeGenFunction::LexicalScope(CGF
, S
.getSourceRange()),
83 emitPreInitStmt(CGF
, S
);
86 assert(S
.hasAssociatedStmt() &&
87 "Expected associated statement for inlined directive.");
88 const CapturedStmt
*CS
= S
.getCapturedStmt(*CapturedRegion
);
89 for (const auto &C
: CS
->captures()) {
90 if (C
.capturesVariable() || C
.capturesVariableByCopy()) {
91 auto *VD
= C
.getCapturedVar();
92 assert(VD
== VD
->getCanonicalDecl() &&
93 "Canonical decl must be captured.");
95 CGF
.getContext(), const_cast<VarDecl
*>(VD
),
96 isCapturedVar(CGF
, VD
) || (CGF
.CapturedStmtInfo
&&
97 InlinedShareds
.isGlobalVarCaptured(VD
)),
98 VD
->getType().getNonReferenceType(), VK_LValue
, C
.getLocation());
99 InlinedShareds
.addPrivate(VD
, CGF
.EmitLValue(&DRE
).getAddress(CGF
));
102 (void)InlinedShareds
.Privatize();
106 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
107 /// for captured expressions.
108 class OMPParallelScope final
: public OMPLexicalScope
{
109 bool EmitPreInitStmt(const OMPExecutableDirective
&S
) {
110 OpenMPDirectiveKind Kind
= S
.getDirectiveKind();
111 return !(isOpenMPTargetExecutionDirective(Kind
) ||
112 isOpenMPLoopBoundSharingDirective(Kind
)) &&
113 isOpenMPParallelDirective(Kind
);
117 OMPParallelScope(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
118 : OMPLexicalScope(CGF
, S
, /*CapturedRegion=*/std::nullopt
,
119 EmitPreInitStmt(S
)) {}
122 /// Lexical scope for OpenMP teams construct, that handles correct codegen
123 /// for captured expressions.
124 class OMPTeamsScope final
: public OMPLexicalScope
{
125 bool EmitPreInitStmt(const OMPExecutableDirective
&S
) {
126 OpenMPDirectiveKind Kind
= S
.getDirectiveKind();
127 return !isOpenMPTargetExecutionDirective(Kind
) &&
128 isOpenMPTeamsDirective(Kind
);
132 OMPTeamsScope(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
133 : OMPLexicalScope(CGF
, S
, /*CapturedRegion=*/std::nullopt
,
134 EmitPreInitStmt(S
)) {}
137 /// Private scope for OpenMP loop-based directives, that supports capturing
138 /// of used expression from loop statement.
139 class OMPLoopScope
: public CodeGenFunction::RunCleanupsScope
{
140 void emitPreInitStmt(CodeGenFunction
&CGF
, const OMPLoopBasedDirective
&S
) {
141 const DeclStmt
*PreInits
;
142 CodeGenFunction::OMPMapVars PreCondVars
;
143 if (auto *LD
= dyn_cast
<OMPLoopDirective
>(&S
)) {
144 llvm::DenseSet
<const VarDecl
*> EmittedAsPrivate
;
145 for (const auto *E
: LD
->counters()) {
146 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
147 EmittedAsPrivate
.insert(VD
->getCanonicalDecl());
148 (void)PreCondVars
.setVarAddr(
149 CGF
, VD
, CGF
.CreateMemTemp(VD
->getType().getNonReferenceType()));
151 // Mark private vars as undefs.
152 for (const auto *C
: LD
->getClausesOfKind
<OMPPrivateClause
>()) {
153 for (const Expr
*IRef
: C
->varlists()) {
155 cast
<VarDecl
>(cast
<DeclRefExpr
>(IRef
)->getDecl());
156 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
157 QualType OrigVDTy
= OrigVD
->getType().getNonReferenceType();
158 (void)PreCondVars
.setVarAddr(
160 Address(llvm::UndefValue::get(CGF
.ConvertTypeForMem(
161 CGF
.getContext().getPointerType(OrigVDTy
))),
162 CGF
.ConvertTypeForMem(OrigVDTy
),
163 CGF
.getContext().getDeclAlign(OrigVD
)));
167 (void)PreCondVars
.apply(CGF
);
168 // Emit init, __range and __end variables for C++ range loops.
169 (void)OMPLoopBasedDirective::doForAllLoops(
170 LD
->getInnermostCapturedStmt()->getCapturedStmt(),
171 /*TryImperfectlyNestedLoops=*/true, LD
->getLoopsNumber(),
172 [&CGF
](unsigned Cnt
, const Stmt
*CurStmt
) {
173 if (const auto *CXXFor
= dyn_cast
<CXXForRangeStmt
>(CurStmt
)) {
174 if (const Stmt
*Init
= CXXFor
->getInit())
176 CGF
.EmitStmt(CXXFor
->getRangeStmt());
177 CGF
.EmitStmt(CXXFor
->getEndStmt());
181 PreInits
= cast_or_null
<DeclStmt
>(LD
->getPreInits());
182 } else if (const auto *Tile
= dyn_cast
<OMPTileDirective
>(&S
)) {
183 PreInits
= cast_or_null
<DeclStmt
>(Tile
->getPreInits());
184 } else if (const auto *Unroll
= dyn_cast
<OMPUnrollDirective
>(&S
)) {
185 PreInits
= cast_or_null
<DeclStmt
>(Unroll
->getPreInits());
187 llvm_unreachable("Unknown loop-based directive kind.");
190 for (const auto *I
: PreInits
->decls())
191 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
193 PreCondVars
.restore(CGF
);
197 OMPLoopScope(CodeGenFunction
&CGF
, const OMPLoopBasedDirective
&S
)
198 : CodeGenFunction::RunCleanupsScope(CGF
) {
199 emitPreInitStmt(CGF
, S
);
203 class OMPSimdLexicalScope
: public CodeGenFunction::LexicalScope
{
204 CodeGenFunction::OMPPrivateScope InlinedShareds
;
206 static bool isCapturedVar(CodeGenFunction
&CGF
, const VarDecl
*VD
) {
207 return CGF
.LambdaCaptureFields
.lookup(VD
) ||
208 (CGF
.CapturedStmtInfo
&& CGF
.CapturedStmtInfo
->lookup(VD
)) ||
209 (CGF
.CurCodeDecl
&& isa
<BlockDecl
>(CGF
.CurCodeDecl
) &&
210 cast
<BlockDecl
>(CGF
.CurCodeDecl
)->capturesVariable(VD
));
214 OMPSimdLexicalScope(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
215 : CodeGenFunction::LexicalScope(CGF
, S
.getSourceRange()),
216 InlinedShareds(CGF
) {
217 for (const auto *C
: S
.clauses()) {
218 if (const auto *CPI
= OMPClauseWithPreInit::get(C
)) {
219 if (const auto *PreInit
=
220 cast_or_null
<DeclStmt
>(CPI
->getPreInitStmt())) {
221 for (const auto *I
: PreInit
->decls()) {
222 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
223 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
225 CodeGenFunction::AutoVarEmission Emission
=
226 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
227 CGF
.EmitAutoVarCleanups(Emission
);
231 } else if (const auto *UDP
= dyn_cast
<OMPUseDevicePtrClause
>(C
)) {
232 for (const Expr
*E
: UDP
->varlists()) {
233 const Decl
*D
= cast
<DeclRefExpr
>(E
)->getDecl();
234 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(D
))
235 CGF
.EmitVarDecl(*OED
);
237 } else if (const auto *UDP
= dyn_cast
<OMPUseDeviceAddrClause
>(C
)) {
238 for (const Expr
*E
: UDP
->varlists()) {
239 const Decl
*D
= getBaseDecl(E
);
240 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(D
))
241 CGF
.EmitVarDecl(*OED
);
245 if (!isOpenMPSimdDirective(S
.getDirectiveKind()))
246 CGF
.EmitOMPPrivateClause(S
, InlinedShareds
);
247 if (const auto *TG
= dyn_cast
<OMPTaskgroupDirective
>(&S
)) {
248 if (const Expr
*E
= TG
->getReductionRef())
249 CGF
.EmitVarDecl(*cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl()));
251 // Temp copy arrays for inscan reductions should not be emitted as they are
252 // not used in simd only mode.
253 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> CopyArrayTemps
;
254 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
255 if (C
->getModifier() != OMPC_REDUCTION_inscan
)
257 for (const Expr
*E
: C
->copy_array_temps())
258 CopyArrayTemps
.insert(cast
<DeclRefExpr
>(E
)->getDecl());
260 const auto *CS
= cast_or_null
<CapturedStmt
>(S
.getAssociatedStmt());
262 for (auto &C
: CS
->captures()) {
263 if (C
.capturesVariable() || C
.capturesVariableByCopy()) {
264 auto *VD
= C
.getCapturedVar();
265 if (CopyArrayTemps
.contains(VD
))
267 assert(VD
== VD
->getCanonicalDecl() &&
268 "Canonical decl must be captured.");
269 DeclRefExpr
DRE(CGF
.getContext(), const_cast<VarDecl
*>(VD
),
270 isCapturedVar(CGF
, VD
) ||
271 (CGF
.CapturedStmtInfo
&&
272 InlinedShareds
.isGlobalVarCaptured(VD
)),
273 VD
->getType().getNonReferenceType(), VK_LValue
,
275 InlinedShareds
.addPrivate(VD
, CGF
.EmitLValue(&DRE
).getAddress(CGF
));
278 CS
= dyn_cast
<CapturedStmt
>(CS
->getCapturedStmt());
280 (void)InlinedShareds
.Privatize();
286 static void emitCommonOMPTargetDirective(CodeGenFunction
&CGF
,
287 const OMPExecutableDirective
&S
,
288 const RegionCodeGenTy
&CodeGen
);
290 LValue
CodeGenFunction::EmitOMPSharedLValue(const Expr
*E
) {
291 if (const auto *OrigDRE
= dyn_cast
<DeclRefExpr
>(E
)) {
292 if (const auto *OrigVD
= dyn_cast
<VarDecl
>(OrigDRE
->getDecl())) {
293 OrigVD
= OrigVD
->getCanonicalDecl();
295 LambdaCaptureFields
.lookup(OrigVD
) ||
296 (CapturedStmtInfo
&& CapturedStmtInfo
->lookup(OrigVD
)) ||
297 (CurCodeDecl
&& isa
<BlockDecl
>(CurCodeDecl
));
298 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
), IsCaptured
,
299 OrigDRE
->getType(), VK_LValue
, OrigDRE
->getExprLoc());
300 return EmitLValue(&DRE
);
303 return EmitLValue(E
);
306 llvm::Value
*CodeGenFunction::getTypeSize(QualType Ty
) {
307 ASTContext
&C
= getContext();
308 llvm::Value
*Size
= nullptr;
309 auto SizeInChars
= C
.getTypeSizeInChars(Ty
);
310 if (SizeInChars
.isZero()) {
311 // getTypeSizeInChars() returns 0 for a VLA.
312 while (const VariableArrayType
*VAT
= C
.getAsVariableArrayType(Ty
)) {
313 VlaSizePair VlaSize
= getVLASize(VAT
);
316 Size
? Builder
.CreateNUWMul(Size
, VlaSize
.NumElts
) : VlaSize
.NumElts
;
318 SizeInChars
= C
.getTypeSizeInChars(Ty
);
319 if (SizeInChars
.isZero())
320 return llvm::ConstantInt::get(SizeTy
, /*V=*/0);
321 return Builder
.CreateNUWMul(Size
, CGM
.getSize(SizeInChars
));
323 return CGM
.getSize(SizeInChars
);
326 void CodeGenFunction::GenerateOpenMPCapturedVars(
327 const CapturedStmt
&S
, SmallVectorImpl
<llvm::Value
*> &CapturedVars
) {
328 const RecordDecl
*RD
= S
.getCapturedRecordDecl();
329 auto CurField
= RD
->field_begin();
330 auto CurCap
= S
.captures().begin();
331 for (CapturedStmt::const_capture_init_iterator I
= S
.capture_init_begin(),
332 E
= S
.capture_init_end();
333 I
!= E
; ++I
, ++CurField
, ++CurCap
) {
334 if (CurField
->hasCapturedVLAType()) {
335 const VariableArrayType
*VAT
= CurField
->getCapturedVLAType();
336 llvm::Value
*Val
= VLASizeMap
[VAT
->getSizeExpr()];
337 CapturedVars
.push_back(Val
);
338 } else if (CurCap
->capturesThis()) {
339 CapturedVars
.push_back(CXXThisValue
);
340 } else if (CurCap
->capturesVariableByCopy()) {
341 llvm::Value
*CV
= EmitLoadOfScalar(EmitLValue(*I
), CurCap
->getLocation());
343 // If the field is not a pointer, we need to save the actual value
344 // and load it as a void pointer.
345 if (!CurField
->getType()->isAnyPointerType()) {
346 ASTContext
&Ctx
= getContext();
347 Address DstAddr
= CreateMemTemp(
348 Ctx
.getUIntPtrType(),
349 Twine(CurCap
->getCapturedVar()->getName(), ".casted"));
350 LValue DstLV
= MakeAddrLValue(DstAddr
, Ctx
.getUIntPtrType());
352 llvm::Value
*SrcAddrVal
= EmitScalarConversion(
353 DstAddr
.getPointer(), Ctx
.getPointerType(Ctx
.getUIntPtrType()),
354 Ctx
.getPointerType(CurField
->getType()), CurCap
->getLocation());
356 MakeNaturalAlignAddrLValue(SrcAddrVal
, CurField
->getType());
358 // Store the value using the source type pointer.
359 EmitStoreThroughLValue(RValue::get(CV
), SrcLV
);
361 // Load the value using the destination type pointer.
362 CV
= EmitLoadOfScalar(DstLV
, CurCap
->getLocation());
364 CapturedVars
.push_back(CV
);
366 assert(CurCap
->capturesVariable() && "Expected capture by reference.");
367 CapturedVars
.push_back(EmitLValue(*I
).getAddress(*this).getPointer());
372 static Address
castValueFromUintptr(CodeGenFunction
&CGF
, SourceLocation Loc
,
373 QualType DstType
, StringRef Name
,
375 ASTContext
&Ctx
= CGF
.getContext();
377 llvm::Value
*CastedPtr
= CGF
.EmitScalarConversion(
378 AddrLV
.getAddress(CGF
).getPointer(), Ctx
.getUIntPtrType(),
379 Ctx
.getPointerType(DstType
), Loc
);
381 CGF
.MakeNaturalAlignAddrLValue(CastedPtr
, DstType
).getAddress(CGF
);
385 static QualType
getCanonicalParamType(ASTContext
&C
, QualType T
) {
386 if (T
->isLValueReferenceType())
387 return C
.getLValueReferenceType(
388 getCanonicalParamType(C
, T
.getNonReferenceType()),
389 /*SpelledAsLValue=*/false);
390 if (T
->isPointerType())
391 return C
.getPointerType(getCanonicalParamType(C
, T
->getPointeeType()));
392 if (const ArrayType
*A
= T
->getAsArrayTypeUnsafe()) {
393 if (const auto *VLA
= dyn_cast
<VariableArrayType
>(A
))
394 return getCanonicalParamType(C
, VLA
->getElementType());
395 if (!A
->isVariablyModifiedType())
396 return C
.getCanonicalType(T
);
398 return C
.getCanonicalParamType(T
);
402 /// Contains required data for proper outlined function codegen.
403 struct FunctionOptions
{
404 /// Captured statement for which the function is generated.
405 const CapturedStmt
*S
= nullptr;
406 /// true if cast to/from UIntPtr is required for variables captured by
408 const bool UIntPtrCastRequired
= true;
409 /// true if only casted arguments must be registered as local args or VLA
411 const bool RegisterCastedArgsOnly
= false;
412 /// Name of the generated function.
413 const StringRef FunctionName
;
414 /// Location of the non-debug version of the outlined function.
416 explicit FunctionOptions(const CapturedStmt
*S
, bool UIntPtrCastRequired
,
417 bool RegisterCastedArgsOnly
, StringRef FunctionName
,
419 : S(S
), UIntPtrCastRequired(UIntPtrCastRequired
),
420 RegisterCastedArgsOnly(UIntPtrCastRequired
&& RegisterCastedArgsOnly
),
421 FunctionName(FunctionName
), Loc(Loc
) {}
425 static llvm::Function
*emitOutlinedFunctionPrologue(
426 CodeGenFunction
&CGF
, FunctionArgList
&Args
,
427 llvm::MapVector
<const Decl
*, std::pair
<const VarDecl
*, Address
>>
429 llvm::DenseMap
<const Decl
*, std::pair
<const Expr
*, llvm::Value
*>>
431 llvm::Value
*&CXXThisValue
, const FunctionOptions
&FO
) {
432 const CapturedDecl
*CD
= FO
.S
->getCapturedDecl();
433 const RecordDecl
*RD
= FO
.S
->getCapturedRecordDecl();
434 assert(CD
->hasBody() && "missing CapturedDecl body");
436 CXXThisValue
= nullptr;
437 // Build the argument list.
438 CodeGenModule
&CGM
= CGF
.CGM
;
439 ASTContext
&Ctx
= CGM
.getContext();
440 FunctionArgList TargetArgs
;
441 Args
.append(CD
->param_begin(),
442 std::next(CD
->param_begin(), CD
->getContextParamPosition()));
445 std::next(CD
->param_begin(), CD
->getContextParamPosition()));
446 auto I
= FO
.S
->captures().begin();
447 FunctionDecl
*DebugFunctionDecl
= nullptr;
448 if (!FO
.UIntPtrCastRequired
) {
449 FunctionProtoType::ExtProtoInfo EPI
;
450 QualType FunctionTy
= Ctx
.getFunctionType(Ctx
.VoidTy
, std::nullopt
, EPI
);
451 DebugFunctionDecl
= FunctionDecl::Create(
452 Ctx
, Ctx
.getTranslationUnitDecl(), FO
.S
->getBeginLoc(),
453 SourceLocation(), DeclarationName(), FunctionTy
,
454 Ctx
.getTrivialTypeSourceInfo(FunctionTy
), SC_Static
,
455 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
456 /*hasWrittenPrototype=*/false);
458 for (const FieldDecl
*FD
: RD
->fields()) {
459 QualType ArgType
= FD
->getType();
460 IdentifierInfo
*II
= nullptr;
461 VarDecl
*CapVar
= nullptr;
463 // If this is a capture by copy and the type is not a pointer, the outlined
464 // function argument type should be uintptr and the value properly casted to
465 // uintptr. This is necessary given that the runtime library is only able to
466 // deal with pointers. We can pass in the same way the VLA type sizes to the
467 // outlined function.
468 if (FO
.UIntPtrCastRequired
&&
469 ((I
->capturesVariableByCopy() && !ArgType
->isAnyPointerType()) ||
470 I
->capturesVariableArrayType()))
471 ArgType
= Ctx
.getUIntPtrType();
473 if (I
->capturesVariable() || I
->capturesVariableByCopy()) {
474 CapVar
= I
->getCapturedVar();
475 II
= CapVar
->getIdentifier();
476 } else if (I
->capturesThis()) {
477 II
= &Ctx
.Idents
.get("this");
479 assert(I
->capturesVariableArrayType());
480 II
= &Ctx
.Idents
.get("vla");
482 if (ArgType
->isVariablyModifiedType())
483 ArgType
= getCanonicalParamType(Ctx
, ArgType
);
485 if (CapVar
&& (CapVar
->getTLSKind() != clang::VarDecl::TLS_None
)) {
486 Arg
= ImplicitParamDecl::Create(Ctx
, /*DC=*/nullptr, FD
->getLocation(),
488 ImplicitParamDecl::ThreadPrivateVar
);
489 } else if (DebugFunctionDecl
&& (CapVar
|| I
->capturesThis())) {
490 Arg
= ParmVarDecl::Create(
491 Ctx
, DebugFunctionDecl
,
492 CapVar
? CapVar
->getBeginLoc() : FD
->getBeginLoc(),
493 CapVar
? CapVar
->getLocation() : FD
->getLocation(), II
, ArgType
,
494 /*TInfo=*/nullptr, SC_None
, /*DefArg=*/nullptr);
496 Arg
= ImplicitParamDecl::Create(Ctx
, /*DC=*/nullptr, FD
->getLocation(),
497 II
, ArgType
, ImplicitParamDecl::Other
);
499 Args
.emplace_back(Arg
);
500 // Do not cast arguments if we emit function with non-original types.
501 TargetArgs
.emplace_back(
502 FO
.UIntPtrCastRequired
504 : CGM
.getOpenMPRuntime().translateParameter(FD
, Arg
));
507 Args
.append(std::next(CD
->param_begin(), CD
->getContextParamPosition() + 1),
510 std::next(CD
->param_begin(), CD
->getContextParamPosition() + 1),
513 // Create the function declaration.
514 const CGFunctionInfo
&FuncInfo
=
515 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(Ctx
.VoidTy
, TargetArgs
);
516 llvm::FunctionType
*FuncLLVMTy
= CGM
.getTypes().GetFunctionType(FuncInfo
);
519 llvm::Function::Create(FuncLLVMTy
, llvm::GlobalValue::InternalLinkage
,
520 FO
.FunctionName
, &CGM
.getModule());
521 CGM
.SetInternalFunctionAttributes(CD
, F
, FuncInfo
);
523 F
->setDoesNotThrow();
524 F
->setDoesNotRecurse();
526 // Always inline the outlined function if optimizations are enabled.
527 if (CGM
.getCodeGenOpts().OptimizationLevel
!= 0) {
528 F
->removeFnAttr(llvm::Attribute::NoInline
);
529 F
->addFnAttr(llvm::Attribute::AlwaysInline
);
532 // Generate the function.
533 CGF
.StartFunction(CD
, Ctx
.VoidTy
, F
, FuncInfo
, TargetArgs
,
534 FO
.UIntPtrCastRequired
? FO
.Loc
: FO
.S
->getBeginLoc(),
535 FO
.UIntPtrCastRequired
? FO
.Loc
536 : CD
->getBody()->getBeginLoc());
537 unsigned Cnt
= CD
->getContextParamPosition();
538 I
= FO
.S
->captures().begin();
539 for (const FieldDecl
*FD
: RD
->fields()) {
540 // Do not map arguments if we emit function with non-original types.
541 Address
LocalAddr(Address::invalid());
542 if (!FO
.UIntPtrCastRequired
&& Args
[Cnt
] != TargetArgs
[Cnt
]) {
543 LocalAddr
= CGM
.getOpenMPRuntime().getParameterAddress(CGF
, Args
[Cnt
],
546 LocalAddr
= CGF
.GetAddrOfLocalVar(Args
[Cnt
]);
548 // If we are capturing a pointer by copy we don't need to do anything, just
549 // use the value that we get from the arguments.
550 if (I
->capturesVariableByCopy() && FD
->getType()->isAnyPointerType()) {
551 const VarDecl
*CurVD
= I
->getCapturedVar();
552 if (!FO
.RegisterCastedArgsOnly
)
553 LocalAddrs
.insert({Args
[Cnt
], {CurVD
, LocalAddr
}});
559 LValue ArgLVal
= CGF
.MakeAddrLValue(LocalAddr
, Args
[Cnt
]->getType(),
560 AlignmentSource::Decl
);
561 if (FD
->hasCapturedVLAType()) {
562 if (FO
.UIntPtrCastRequired
) {
563 ArgLVal
= CGF
.MakeAddrLValue(
564 castValueFromUintptr(CGF
, I
->getLocation(), FD
->getType(),
565 Args
[Cnt
]->getName(), ArgLVal
),
566 FD
->getType(), AlignmentSource::Decl
);
568 llvm::Value
*ExprArg
= CGF
.EmitLoadOfScalar(ArgLVal
, I
->getLocation());
569 const VariableArrayType
*VAT
= FD
->getCapturedVLAType();
570 VLASizes
.try_emplace(Args
[Cnt
], VAT
->getSizeExpr(), ExprArg
);
571 } else if (I
->capturesVariable()) {
572 const VarDecl
*Var
= I
->getCapturedVar();
573 QualType VarTy
= Var
->getType();
574 Address ArgAddr
= ArgLVal
.getAddress(CGF
);
575 if (ArgLVal
.getType()->isLValueReferenceType()) {
576 ArgAddr
= CGF
.EmitLoadOfReference(ArgLVal
);
577 } else if (!VarTy
->isVariablyModifiedType() || !VarTy
->isPointerType()) {
578 assert(ArgLVal
.getType()->isPointerType());
579 ArgAddr
= CGF
.EmitLoadOfPointer(
580 ArgAddr
, ArgLVal
.getType()->castAs
<PointerType
>());
582 if (!FO
.RegisterCastedArgsOnly
) {
584 {Args
[Cnt
], {Var
, ArgAddr
.withAlignment(Ctx
.getDeclAlign(Var
))}});
586 } else if (I
->capturesVariableByCopy()) {
587 assert(!FD
->getType()->isAnyPointerType() &&
588 "Not expecting a captured pointer.");
589 const VarDecl
*Var
= I
->getCapturedVar();
590 LocalAddrs
.insert({Args
[Cnt
],
591 {Var
, FO
.UIntPtrCastRequired
592 ? castValueFromUintptr(
593 CGF
, I
->getLocation(), FD
->getType(),
594 Args
[Cnt
]->getName(), ArgLVal
)
595 : ArgLVal
.getAddress(CGF
)}});
597 // If 'this' is captured, load it into CXXThisValue.
598 assert(I
->capturesThis());
599 CXXThisValue
= CGF
.EmitLoadOfScalar(ArgLVal
, I
->getLocation());
600 LocalAddrs
.insert({Args
[Cnt
], {nullptr, ArgLVal
.getAddress(CGF
)}});
610 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt
&S
,
611 SourceLocation Loc
) {
614 "CapturedStmtInfo should be set when generating the captured function");
615 const CapturedDecl
*CD
= S
.getCapturedDecl();
616 // Build the argument list.
617 bool NeedWrapperFunction
=
618 getDebugInfo() && CGM
.getCodeGenOpts().hasReducedDebugInfo();
619 FunctionArgList Args
;
620 llvm::MapVector
<const Decl
*, std::pair
<const VarDecl
*, Address
>> LocalAddrs
;
621 llvm::DenseMap
<const Decl
*, std::pair
<const Expr
*, llvm::Value
*>> VLASizes
;
622 SmallString
<256> Buffer
;
623 llvm::raw_svector_ostream
Out(Buffer
);
624 Out
<< CapturedStmtInfo
->getHelperName();
625 if (NeedWrapperFunction
)
627 FunctionOptions
FO(&S
, !NeedWrapperFunction
, /*RegisterCastedArgsOnly=*/false,
629 llvm::Function
*F
= emitOutlinedFunctionPrologue(*this, Args
, LocalAddrs
,
630 VLASizes
, CXXThisValue
, FO
);
631 CodeGenFunction::OMPPrivateScope
LocalScope(*this);
632 for (const auto &LocalAddrPair
: LocalAddrs
) {
633 if (LocalAddrPair
.second
.first
) {
634 LocalScope
.addPrivate(LocalAddrPair
.second
.first
,
635 LocalAddrPair
.second
.second
);
638 (void)LocalScope
.Privatize();
639 for (const auto &VLASizePair
: VLASizes
)
640 VLASizeMap
[VLASizePair
.second
.first
] = VLASizePair
.second
.second
;
641 PGO
.assignRegionCounters(GlobalDecl(CD
), F
);
642 CapturedStmtInfo
->EmitBody(*this, CD
->getBody());
643 (void)LocalScope
.ForceCleanup();
644 FinishFunction(CD
->getBodyRBrace());
645 if (!NeedWrapperFunction
)
648 FunctionOptions
WrapperFO(&S
, /*UIntPtrCastRequired=*/true,
649 /*RegisterCastedArgsOnly=*/true,
650 CapturedStmtInfo
->getHelperName(), Loc
);
651 CodeGenFunction
WrapperCGF(CGM
, /*suppressNewContext=*/true);
652 WrapperCGF
.CapturedStmtInfo
= CapturedStmtInfo
;
656 llvm::Function
*WrapperF
=
657 emitOutlinedFunctionPrologue(WrapperCGF
, Args
, LocalAddrs
, VLASizes
,
658 WrapperCGF
.CXXThisValue
, WrapperFO
);
659 llvm::SmallVector
<llvm::Value
*, 4> CallArgs
;
660 auto *PI
= F
->arg_begin();
661 for (const auto *Arg
: Args
) {
662 llvm::Value
*CallArg
;
663 auto I
= LocalAddrs
.find(Arg
);
664 if (I
!= LocalAddrs
.end()) {
665 LValue LV
= WrapperCGF
.MakeAddrLValue(
667 I
->second
.first
? I
->second
.first
->getType() : Arg
->getType(),
668 AlignmentSource::Decl
);
669 if (LV
.getType()->isAnyComplexType())
670 LV
.setAddress(WrapperCGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
671 LV
.getAddress(WrapperCGF
),
672 PI
->getType()->getPointerTo(
673 LV
.getAddress(WrapperCGF
).getAddressSpace()),
675 CallArg
= WrapperCGF
.EmitLoadOfScalar(LV
, S
.getBeginLoc());
677 auto EI
= VLASizes
.find(Arg
);
678 if (EI
!= VLASizes
.end()) {
679 CallArg
= EI
->second
.second
;
682 WrapperCGF
.MakeAddrLValue(WrapperCGF
.GetAddrOfLocalVar(Arg
),
683 Arg
->getType(), AlignmentSource::Decl
);
684 CallArg
= WrapperCGF
.EmitLoadOfScalar(LV
, S
.getBeginLoc());
687 CallArgs
.emplace_back(WrapperCGF
.EmitFromMemory(CallArg
, Arg
->getType()));
690 CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF
, Loc
, F
, CallArgs
);
691 WrapperCGF
.FinishFunction();
695 //===----------------------------------------------------------------------===//
696 // OpenMP Directive Emission
697 //===----------------------------------------------------------------------===//
698 void CodeGenFunction::EmitOMPAggregateAssign(
699 Address DestAddr
, Address SrcAddr
, QualType OriginalType
,
700 const llvm::function_ref
<void(Address
, Address
)> CopyGen
) {
701 // Perform element-by-element initialization.
704 // Drill down to the base element type on both arrays.
705 const ArrayType
*ArrayTy
= OriginalType
->getAsArrayTypeUnsafe();
706 llvm::Value
*NumElements
= emitArrayLength(ArrayTy
, ElementTy
, DestAddr
);
707 SrcAddr
= SrcAddr
.withElementType(DestAddr
.getElementType());
709 llvm::Value
*SrcBegin
= SrcAddr
.getPointer();
710 llvm::Value
*DestBegin
= DestAddr
.getPointer();
711 // Cast from pointer to array type to pointer to single element.
712 llvm::Value
*DestEnd
= Builder
.CreateInBoundsGEP(DestAddr
.getElementType(),
713 DestBegin
, NumElements
);
715 // The basic structure here is a while-do loop.
716 llvm::BasicBlock
*BodyBB
= createBasicBlock("omp.arraycpy.body");
717 llvm::BasicBlock
*DoneBB
= createBasicBlock("omp.arraycpy.done");
718 llvm::Value
*IsEmpty
=
719 Builder
.CreateICmpEQ(DestBegin
, DestEnd
, "omp.arraycpy.isempty");
720 Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
722 // Enter the loop body, making that address the current address.
723 llvm::BasicBlock
*EntryBB
= Builder
.GetInsertBlock();
726 CharUnits ElementSize
= getContext().getTypeSizeInChars(ElementTy
);
728 llvm::PHINode
*SrcElementPHI
=
729 Builder
.CreatePHI(SrcBegin
->getType(), 2, "omp.arraycpy.srcElementPast");
730 SrcElementPHI
->addIncoming(SrcBegin
, EntryBB
);
731 Address SrcElementCurrent
=
732 Address(SrcElementPHI
, SrcAddr
.getElementType(),
733 SrcAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
735 llvm::PHINode
*DestElementPHI
= Builder
.CreatePHI(
736 DestBegin
->getType(), 2, "omp.arraycpy.destElementPast");
737 DestElementPHI
->addIncoming(DestBegin
, EntryBB
);
738 Address DestElementCurrent
=
739 Address(DestElementPHI
, DestAddr
.getElementType(),
740 DestAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
743 CopyGen(DestElementCurrent
, SrcElementCurrent
);
745 // Shift the address forward by one element.
746 llvm::Value
*DestElementNext
=
747 Builder
.CreateConstGEP1_32(DestAddr
.getElementType(), DestElementPHI
,
748 /*Idx0=*/1, "omp.arraycpy.dest.element");
749 llvm::Value
*SrcElementNext
=
750 Builder
.CreateConstGEP1_32(SrcAddr
.getElementType(), SrcElementPHI
,
751 /*Idx0=*/1, "omp.arraycpy.src.element");
752 // Check whether we've reached the end.
754 Builder
.CreateICmpEQ(DestElementNext
, DestEnd
, "omp.arraycpy.done");
755 Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
756 DestElementPHI
->addIncoming(DestElementNext
, Builder
.GetInsertBlock());
757 SrcElementPHI
->addIncoming(SrcElementNext
, Builder
.GetInsertBlock());
760 EmitBlock(DoneBB
, /*IsFinished=*/true);
763 void CodeGenFunction::EmitOMPCopy(QualType OriginalType
, Address DestAddr
,
764 Address SrcAddr
, const VarDecl
*DestVD
,
765 const VarDecl
*SrcVD
, const Expr
*Copy
) {
766 if (OriginalType
->isArrayType()) {
767 const auto *BO
= dyn_cast
<BinaryOperator
>(Copy
);
768 if (BO
&& BO
->getOpcode() == BO_Assign
) {
769 // Perform simple memcpy for simple copying.
770 LValue Dest
= MakeAddrLValue(DestAddr
, OriginalType
);
771 LValue Src
= MakeAddrLValue(SrcAddr
, OriginalType
);
772 EmitAggregateAssign(Dest
, Src
, OriginalType
);
774 // For arrays with complex element types perform element by element
776 EmitOMPAggregateAssign(
777 DestAddr
, SrcAddr
, OriginalType
,
778 [this, Copy
, SrcVD
, DestVD
](Address DestElement
, Address SrcElement
) {
779 // Working with the single array element, so have to remap
780 // destination and source variables to corresponding array
782 CodeGenFunction::OMPPrivateScope
Remap(*this);
783 Remap
.addPrivate(DestVD
, DestElement
);
784 Remap
.addPrivate(SrcVD
, SrcElement
);
785 (void)Remap
.Privatize();
786 EmitIgnoredExpr(Copy
);
790 // Remap pseudo source variable to private copy.
791 CodeGenFunction::OMPPrivateScope
Remap(*this);
792 Remap
.addPrivate(SrcVD
, SrcAddr
);
793 Remap
.addPrivate(DestVD
, DestAddr
);
794 (void)Remap
.Privatize();
795 // Emit copying of the whole variable.
796 EmitIgnoredExpr(Copy
);
800 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective
&D
,
801 OMPPrivateScope
&PrivateScope
) {
802 if (!HaveInsertPoint())
804 bool DeviceConstTarget
=
805 getLangOpts().OpenMPIsTargetDevice
&&
806 isOpenMPTargetExecutionDirective(D
.getDirectiveKind());
807 bool FirstprivateIsLastprivate
= false;
808 llvm::DenseMap
<const VarDecl
*, OpenMPLastprivateModifier
> Lastprivates
;
809 for (const auto *C
: D
.getClausesOfKind
<OMPLastprivateClause
>()) {
810 for (const auto *D
: C
->varlists())
811 Lastprivates
.try_emplace(
812 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
)->getDecl())->getCanonicalDecl(),
815 llvm::DenseSet
<const VarDecl
*> EmittedAsFirstprivate
;
816 llvm::SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
817 getOpenMPCaptureRegions(CaptureRegions
, D
.getDirectiveKind());
818 // Force emission of the firstprivate copy if the directive does not emit
819 // outlined function, like omp for, omp simd, omp distribute etc.
820 bool MustEmitFirstprivateCopy
=
821 CaptureRegions
.size() == 1 && CaptureRegions
.back() == OMPD_unknown
;
822 for (const auto *C
: D
.getClausesOfKind
<OMPFirstprivateClause
>()) {
823 const auto *IRef
= C
->varlist_begin();
824 const auto *InitsRef
= C
->inits().begin();
825 for (const Expr
*IInit
: C
->private_copies()) {
826 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
827 bool ThisFirstprivateIsLastprivate
=
828 Lastprivates
.count(OrigVD
->getCanonicalDecl()) > 0;
829 const FieldDecl
*FD
= CapturedStmtInfo
->lookup(OrigVD
);
830 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IInit
)->getDecl());
831 if (!MustEmitFirstprivateCopy
&& !ThisFirstprivateIsLastprivate
&& FD
&&
832 !FD
->getType()->isReferenceType() &&
833 (!VD
|| !VD
->hasAttr
<OMPAllocateDeclAttr
>())) {
834 EmittedAsFirstprivate
.insert(OrigVD
->getCanonicalDecl());
839 // Do not emit copy for firstprivate constant variables in target regions,
840 // captured by reference.
841 if (DeviceConstTarget
&& OrigVD
->getType().isConstant(getContext()) &&
842 FD
&& FD
->getType()->isReferenceType() &&
843 (!VD
|| !VD
->hasAttr
<OMPAllocateDeclAttr
>())) {
844 EmittedAsFirstprivate
.insert(OrigVD
->getCanonicalDecl());
849 FirstprivateIsLastprivate
=
850 FirstprivateIsLastprivate
|| ThisFirstprivateIsLastprivate
;
851 if (EmittedAsFirstprivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
853 cast
<VarDecl
>(cast
<DeclRefExpr
>(*InitsRef
)->getDecl());
855 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
856 /*RefersToEnclosingVariableOrCapture=*/FD
!= nullptr,
857 (*IRef
)->getType(), VK_LValue
, (*IRef
)->getExprLoc());
860 // Check if the firstprivate variable is just a constant value.
861 ConstantEmission CE
= tryEmitAsConstant(&DRE
);
862 if (CE
&& !CE
.isReference()) {
863 // Constant value, no need to create a copy.
868 if (CE
&& CE
.isReference()) {
869 OriginalLVal
= CE
.getReferenceLValue(*this, &DRE
);
871 assert(!CE
&& "Expected non-constant firstprivate.");
872 OriginalLVal
= EmitLValue(&DRE
);
875 OriginalLVal
= EmitLValue(&DRE
);
877 QualType Type
= VD
->getType();
878 if (Type
->isArrayType()) {
879 // Emit VarDecl with copy init for arrays.
880 // Get the address of the original variable captured in current
882 AutoVarEmission Emission
= EmitAutoVarAlloca(*VD
);
883 const Expr
*Init
= VD
->getInit();
884 if (!isa
<CXXConstructExpr
>(Init
) || isTrivialInitializer(Init
)) {
885 // Perform simple memcpy.
886 LValue Dest
= MakeAddrLValue(Emission
.getAllocatedAddress(), Type
);
887 EmitAggregateAssign(Dest
, OriginalLVal
, Type
);
889 EmitOMPAggregateAssign(
890 Emission
.getAllocatedAddress(), OriginalLVal
.getAddress(*this),
892 [this, VDInit
, Init
](Address DestElement
, Address SrcElement
) {
893 // Clean up any temporaries needed by the
895 RunCleanupsScope
InitScope(*this);
896 // Emit initialization for single element.
897 setAddrOfLocalVar(VDInit
, SrcElement
);
898 EmitAnyExprToMem(Init
, DestElement
,
899 Init
->getType().getQualifiers(),
900 /*IsInitializer*/ false);
901 LocalDeclMap
.erase(VDInit
);
904 EmitAutoVarCleanups(Emission
);
906 PrivateScope
.addPrivate(OrigVD
, Emission
.getAllocatedAddress());
908 Address OriginalAddr
= OriginalLVal
.getAddress(*this);
909 // Emit private VarDecl with copy init.
910 // Remap temp VDInit variable to the address of the original
911 // variable (for proper handling of captured global variables).
912 setAddrOfLocalVar(VDInit
, OriginalAddr
);
914 LocalDeclMap
.erase(VDInit
);
915 Address VDAddr
= GetAddrOfLocalVar(VD
);
916 if (ThisFirstprivateIsLastprivate
&&
917 Lastprivates
[OrigVD
->getCanonicalDecl()] ==
918 OMPC_LASTPRIVATE_conditional
) {
919 // Create/init special variable for lastprivate conditionals.
921 EmitLoadOfScalar(MakeAddrLValue(VDAddr
, (*IRef
)->getType(),
922 AlignmentSource::Decl
),
923 (*IRef
)->getExprLoc());
924 VDAddr
= CGM
.getOpenMPRuntime().emitLastprivateConditionalInit(
926 EmitStoreOfScalar(V
, MakeAddrLValue(VDAddr
, (*IRef
)->getType(),
927 AlignmentSource::Decl
));
928 LocalDeclMap
.erase(VD
);
929 setAddrOfLocalVar(VD
, VDAddr
);
931 IsRegistered
= PrivateScope
.addPrivate(OrigVD
, VDAddr
);
933 assert(IsRegistered
&&
934 "firstprivate var already registered as private");
935 // Silence the warning about unused variable.
942 return FirstprivateIsLastprivate
&& !EmittedAsFirstprivate
.empty();
945 void CodeGenFunction::EmitOMPPrivateClause(
946 const OMPExecutableDirective
&D
,
947 CodeGenFunction::OMPPrivateScope
&PrivateScope
) {
948 if (!HaveInsertPoint())
950 llvm::DenseSet
<const VarDecl
*> EmittedAsPrivate
;
951 for (const auto *C
: D
.getClausesOfKind
<OMPPrivateClause
>()) {
952 auto IRef
= C
->varlist_begin();
953 for (const Expr
*IInit
: C
->private_copies()) {
954 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
955 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
956 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IInit
)->getDecl());
958 // Emit private VarDecl with copy init.
960 PrivateScope
.addPrivate(OrigVD
, GetAddrOfLocalVar(VD
));
961 assert(IsRegistered
&& "private var already registered as private");
962 // Silence the warning about unused variable.
970 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective
&D
) {
971 if (!HaveInsertPoint())
973 // threadprivate_var1 = master_threadprivate_var1;
974 // operator=(threadprivate_var2, master_threadprivate_var2);
976 // __kmpc_barrier(&loc, global_tid);
977 llvm::DenseSet
<const VarDecl
*> CopiedVars
;
978 llvm::BasicBlock
*CopyBegin
= nullptr, *CopyEnd
= nullptr;
979 for (const auto *C
: D
.getClausesOfKind
<OMPCopyinClause
>()) {
980 auto IRef
= C
->varlist_begin();
981 auto ISrcRef
= C
->source_exprs().begin();
982 auto IDestRef
= C
->destination_exprs().begin();
983 for (const Expr
*AssignOp
: C
->assignment_ops()) {
984 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
985 QualType Type
= VD
->getType();
986 if (CopiedVars
.insert(VD
->getCanonicalDecl()).second
) {
987 // Get the address of the master variable. If we are emitting code with
988 // TLS support, the address is passed from the master as field in the
989 // captured declaration.
990 Address MasterAddr
= Address::invalid();
991 if (getLangOpts().OpenMPUseTLS
&&
992 getContext().getTargetInfo().isTLSSupported()) {
993 assert(CapturedStmtInfo
->lookup(VD
) &&
994 "Copyin threadprivates should have been captured!");
995 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(VD
), true,
996 (*IRef
)->getType(), VK_LValue
, (*IRef
)->getExprLoc());
997 MasterAddr
= EmitLValue(&DRE
).getAddress(*this);
998 LocalDeclMap
.erase(VD
);
1001 Address(VD
->isStaticLocal() ? CGM
.getStaticLocalDeclAddress(VD
)
1002 : CGM
.GetAddrOfGlobal(VD
),
1003 CGM
.getTypes().ConvertTypeForMem(VD
->getType()),
1004 getContext().getDeclAlign(VD
));
1006 // Get the address of the threadprivate variable.
1007 Address PrivateAddr
= EmitLValue(*IRef
).getAddress(*this);
1008 if (CopiedVars
.size() == 1) {
1009 // At first check if current thread is a master thread. If it is, no
1010 // need to copy data.
1011 CopyBegin
= createBasicBlock("copyin.not.master");
1012 CopyEnd
= createBasicBlock("copyin.not.master.end");
1013 // TODO: Avoid ptrtoint conversion.
1014 auto *MasterAddrInt
=
1015 Builder
.CreatePtrToInt(MasterAddr
.getPointer(), CGM
.IntPtrTy
);
1016 auto *PrivateAddrInt
=
1017 Builder
.CreatePtrToInt(PrivateAddr
.getPointer(), CGM
.IntPtrTy
);
1018 Builder
.CreateCondBr(
1019 Builder
.CreateICmpNE(MasterAddrInt
, PrivateAddrInt
), CopyBegin
,
1021 EmitBlock(CopyBegin
);
1024 cast
<VarDecl
>(cast
<DeclRefExpr
>(*ISrcRef
)->getDecl());
1025 const auto *DestVD
=
1026 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IDestRef
)->getDecl());
1027 EmitOMPCopy(Type
, PrivateAddr
, MasterAddr
, DestVD
, SrcVD
, AssignOp
);
1035 // Exit out of copying procedure for non-master thread.
1036 EmitBlock(CopyEnd
, /*IsFinished=*/true);
1042 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1043 const OMPExecutableDirective
&D
, OMPPrivateScope
&PrivateScope
) {
1044 if (!HaveInsertPoint())
1046 bool HasAtLeastOneLastprivate
= false;
1047 llvm::DenseSet
<const VarDecl
*> SIMDLCVs
;
1048 if (isOpenMPSimdDirective(D
.getDirectiveKind())) {
1049 const auto *LoopDirective
= cast
<OMPLoopDirective
>(&D
);
1050 for (const Expr
*C
: LoopDirective
->counters()) {
1052 cast
<VarDecl
>(cast
<DeclRefExpr
>(C
)->getDecl())->getCanonicalDecl());
1055 llvm::DenseSet
<const VarDecl
*> AlreadyEmittedVars
;
1056 for (const auto *C
: D
.getClausesOfKind
<OMPLastprivateClause
>()) {
1057 HasAtLeastOneLastprivate
= true;
1058 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind()) &&
1059 !getLangOpts().OpenMPSimd
)
1061 const auto *IRef
= C
->varlist_begin();
1062 const auto *IDestRef
= C
->destination_exprs().begin();
1063 for (const Expr
*IInit
: C
->private_copies()) {
1064 // Keep the address of the original variable for future update at the end
1066 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
1067 // Taskloops do not require additional initialization, it is done in
1068 // runtime support library.
1069 if (AlreadyEmittedVars
.insert(OrigVD
->getCanonicalDecl()).second
) {
1070 const auto *DestVD
=
1071 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IDestRef
)->getDecl());
1072 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
1073 /*RefersToEnclosingVariableOrCapture=*/
1074 CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
1075 (*IRef
)->getType(), VK_LValue
, (*IRef
)->getExprLoc());
1076 PrivateScope
.addPrivate(DestVD
, EmitLValue(&DRE
).getAddress(*this));
1077 // Check if the variable is also a firstprivate: in this case IInit is
1078 // not generated. Initialization of this variable will happen in codegen
1079 // for 'firstprivate' clause.
1080 if (IInit
&& !SIMDLCVs
.count(OrigVD
->getCanonicalDecl())) {
1081 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IInit
)->getDecl());
1082 Address VDAddr
= Address::invalid();
1083 if (C
->getKind() == OMPC_LASTPRIVATE_conditional
) {
1084 VDAddr
= CGM
.getOpenMPRuntime().emitLastprivateConditionalInit(
1086 setAddrOfLocalVar(VD
, VDAddr
);
1088 // Emit private VarDecl with copy init.
1090 VDAddr
= GetAddrOfLocalVar(VD
);
1092 bool IsRegistered
= PrivateScope
.addPrivate(OrigVD
, VDAddr
);
1093 assert(IsRegistered
&&
1094 "lastprivate var already registered as private");
1102 return HasAtLeastOneLastprivate
;
1105 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1106 const OMPExecutableDirective
&D
, bool NoFinals
,
1107 llvm::Value
*IsLastIterCond
) {
1108 if (!HaveInsertPoint())
1110 // Emit following code:
1111 // if (<IsLastIterCond>) {
1112 // orig_var1 = private_orig_var1;
1114 // orig_varn = private_orig_varn;
1116 llvm::BasicBlock
*ThenBB
= nullptr;
1117 llvm::BasicBlock
*DoneBB
= nullptr;
1118 if (IsLastIterCond
) {
1119 // Emit implicit barrier if at least one lastprivate conditional is found
1120 // and this is not a simd mode.
1121 if (!getLangOpts().OpenMPSimd
&&
1122 llvm::any_of(D
.getClausesOfKind
<OMPLastprivateClause
>(),
1123 [](const OMPLastprivateClause
*C
) {
1124 return C
->getKind() == OMPC_LASTPRIVATE_conditional
;
1126 CGM
.getOpenMPRuntime().emitBarrierCall(*this, D
.getBeginLoc(),
1128 /*EmitChecks=*/false,
1129 /*ForceSimpleCall=*/true);
1131 ThenBB
= createBasicBlock(".omp.lastprivate.then");
1132 DoneBB
= createBasicBlock(".omp.lastprivate.done");
1133 Builder
.CreateCondBr(IsLastIterCond
, ThenBB
, DoneBB
);
1136 llvm::DenseSet
<const VarDecl
*> AlreadyEmittedVars
;
1137 llvm::DenseMap
<const VarDecl
*, const Expr
*> LoopCountersAndUpdates
;
1138 if (const auto *LoopDirective
= dyn_cast
<OMPLoopDirective
>(&D
)) {
1139 auto IC
= LoopDirective
->counters().begin();
1140 for (const Expr
*F
: LoopDirective
->finals()) {
1142 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IC
)->getDecl())->getCanonicalDecl();
1144 AlreadyEmittedVars
.insert(D
);
1146 LoopCountersAndUpdates
[D
] = F
;
1150 for (const auto *C
: D
.getClausesOfKind
<OMPLastprivateClause
>()) {
1151 auto IRef
= C
->varlist_begin();
1152 auto ISrcRef
= C
->source_exprs().begin();
1153 auto IDestRef
= C
->destination_exprs().begin();
1154 for (const Expr
*AssignOp
: C
->assignment_ops()) {
1155 const auto *PrivateVD
=
1156 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
1157 QualType Type
= PrivateVD
->getType();
1158 const auto *CanonicalVD
= PrivateVD
->getCanonicalDecl();
1159 if (AlreadyEmittedVars
.insert(CanonicalVD
).second
) {
1160 // If lastprivate variable is a loop control variable for loop-based
1161 // directive, update its value before copyin back to original
1163 if (const Expr
*FinalExpr
= LoopCountersAndUpdates
.lookup(CanonicalVD
))
1164 EmitIgnoredExpr(FinalExpr
);
1166 cast
<VarDecl
>(cast
<DeclRefExpr
>(*ISrcRef
)->getDecl());
1167 const auto *DestVD
=
1168 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IDestRef
)->getDecl());
1169 // Get the address of the private variable.
1170 Address PrivateAddr
= GetAddrOfLocalVar(PrivateVD
);
1171 if (const auto *RefTy
= PrivateVD
->getType()->getAs
<ReferenceType
>())
1172 PrivateAddr
= Address(
1173 Builder
.CreateLoad(PrivateAddr
),
1174 CGM
.getTypes().ConvertTypeForMem(RefTy
->getPointeeType()),
1175 CGM
.getNaturalTypeAlignment(RefTy
->getPointeeType()));
1176 // Store the last value to the private copy in the last iteration.
1177 if (C
->getKind() == OMPC_LASTPRIVATE_conditional
)
1178 CGM
.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1179 *this, MakeAddrLValue(PrivateAddr
, (*IRef
)->getType()), PrivateVD
,
1180 (*IRef
)->getExprLoc());
1181 // Get the address of the original variable.
1182 Address OriginalAddr
= GetAddrOfLocalVar(DestVD
);
1183 EmitOMPCopy(Type
, OriginalAddr
, PrivateAddr
, DestVD
, SrcVD
, AssignOp
);
1189 if (const Expr
*PostUpdate
= C
->getPostUpdateExpr())
1190 EmitIgnoredExpr(PostUpdate
);
1193 EmitBlock(DoneBB
, /*IsFinished=*/true);
1196 void CodeGenFunction::EmitOMPReductionClauseInit(
1197 const OMPExecutableDirective
&D
,
1198 CodeGenFunction::OMPPrivateScope
&PrivateScope
, bool ForInscan
) {
1199 if (!HaveInsertPoint())
1201 SmallVector
<const Expr
*, 4> Shareds
;
1202 SmallVector
<const Expr
*, 4> Privates
;
1203 SmallVector
<const Expr
*, 4> ReductionOps
;
1204 SmallVector
<const Expr
*, 4> LHSs
;
1205 SmallVector
<const Expr
*, 4> RHSs
;
1207 SmallVector
<const Expr
*, 4> TaskLHSs
;
1208 SmallVector
<const Expr
*, 4> TaskRHSs
;
1209 for (const auto *C
: D
.getClausesOfKind
<OMPReductionClause
>()) {
1210 if (ForInscan
!= (C
->getModifier() == OMPC_REDUCTION_inscan
))
1212 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
1213 Privates
.append(C
->privates().begin(), C
->privates().end());
1214 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
1215 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
1216 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
1217 if (C
->getModifier() == OMPC_REDUCTION_task
) {
1218 Data
.ReductionVars
.append(C
->privates().begin(), C
->privates().end());
1219 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
1220 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
1221 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
1222 C
->reduction_ops().end());
1223 TaskLHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
1224 TaskRHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
1227 ReductionCodeGen
RedCG(Shareds
, Shareds
, Privates
, ReductionOps
);
1229 auto *ILHS
= LHSs
.begin();
1230 auto *IRHS
= RHSs
.begin();
1231 auto *IPriv
= Privates
.begin();
1232 for (const Expr
*IRef
: Shareds
) {
1233 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IPriv
)->getDecl());
1234 // Emit private VarDecl with reduction init.
1235 RedCG
.emitSharedOrigLValue(*this, Count
);
1236 RedCG
.emitAggregateType(*this, Count
);
1237 AutoVarEmission Emission
= EmitAutoVarAlloca(*PrivateVD
);
1238 RedCG
.emitInitialization(*this, Count
, Emission
.getAllocatedAddress(),
1239 RedCG
.getSharedLValue(Count
).getAddress(*this),
1240 [&Emission
](CodeGenFunction
&CGF
) {
1241 CGF
.EmitAutoVarInit(Emission
);
1244 EmitAutoVarCleanups(Emission
);
1245 Address BaseAddr
= RedCG
.adjustPrivateAddress(
1246 *this, Count
, Emission
.getAllocatedAddress());
1248 PrivateScope
.addPrivate(RedCG
.getBaseDecl(Count
), BaseAddr
);
1249 assert(IsRegistered
&& "private var already registered as private");
1250 // Silence the warning about unused variable.
1253 const auto *LHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
1254 const auto *RHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
1255 QualType Type
= PrivateVD
->getType();
1256 bool isaOMPArraySectionExpr
= isa
<OMPArraySectionExpr
>(IRef
);
1257 if (isaOMPArraySectionExpr
&& Type
->isVariablyModifiedType()) {
1258 // Store the address of the original variable associated with the LHS
1259 // implicit variable.
1260 PrivateScope
.addPrivate(LHSVD
,
1261 RedCG
.getSharedLValue(Count
).getAddress(*this));
1262 PrivateScope
.addPrivate(RHSVD
, GetAddrOfLocalVar(PrivateVD
));
1263 } else if ((isaOMPArraySectionExpr
&& Type
->isScalarType()) ||
1264 isa
<ArraySubscriptExpr
>(IRef
)) {
1265 // Store the address of the original variable associated with the LHS
1266 // implicit variable.
1267 PrivateScope
.addPrivate(LHSVD
,
1268 RedCG
.getSharedLValue(Count
).getAddress(*this));
1269 PrivateScope
.addPrivate(RHSVD
,
1270 GetAddrOfLocalVar(PrivateVD
).withElementType(
1271 ConvertTypeForMem(RHSVD
->getType())));
1273 QualType Type
= PrivateVD
->getType();
1274 bool IsArray
= getContext().getAsArrayType(Type
) != nullptr;
1275 Address OriginalAddr
= RedCG
.getSharedLValue(Count
).getAddress(*this);
1276 // Store the address of the original variable associated with the LHS
1277 // implicit variable.
1280 OriginalAddr
.withElementType(ConvertTypeForMem(LHSVD
->getType()));
1282 PrivateScope
.addPrivate(LHSVD
, OriginalAddr
);
1283 PrivateScope
.addPrivate(
1284 RHSVD
, IsArray
? GetAddrOfLocalVar(PrivateVD
).withElementType(
1285 ConvertTypeForMem(RHSVD
->getType()))
1286 : GetAddrOfLocalVar(PrivateVD
));
1293 if (!Data
.ReductionVars
.empty()) {
1294 Data
.IsReductionWithTaskMod
= true;
1295 Data
.IsWorksharingReduction
=
1296 isOpenMPWorksharingDirective(D
.getDirectiveKind());
1297 llvm::Value
*ReductionDesc
= CGM
.getOpenMPRuntime().emitTaskReductionInit(
1298 *this, D
.getBeginLoc(), TaskLHSs
, TaskRHSs
, Data
);
1299 const Expr
*TaskRedRef
= nullptr;
1300 switch (D
.getDirectiveKind()) {
1302 TaskRedRef
= cast
<OMPParallelDirective
>(D
).getTaskReductionRefExpr();
1305 TaskRedRef
= cast
<OMPForDirective
>(D
).getTaskReductionRefExpr();
1308 TaskRedRef
= cast
<OMPSectionsDirective
>(D
).getTaskReductionRefExpr();
1310 case OMPD_parallel_for
:
1311 TaskRedRef
= cast
<OMPParallelForDirective
>(D
).getTaskReductionRefExpr();
1313 case OMPD_parallel_master
:
1315 cast
<OMPParallelMasterDirective
>(D
).getTaskReductionRefExpr();
1317 case OMPD_parallel_sections
:
1319 cast
<OMPParallelSectionsDirective
>(D
).getTaskReductionRefExpr();
1321 case OMPD_target_parallel
:
1323 cast
<OMPTargetParallelDirective
>(D
).getTaskReductionRefExpr();
1325 case OMPD_target_parallel_for
:
1327 cast
<OMPTargetParallelForDirective
>(D
).getTaskReductionRefExpr();
1329 case OMPD_distribute_parallel_for
:
1331 cast
<OMPDistributeParallelForDirective
>(D
).getTaskReductionRefExpr();
1333 case OMPD_teams_distribute_parallel_for
:
1334 TaskRedRef
= cast
<OMPTeamsDistributeParallelForDirective
>(D
)
1335 .getTaskReductionRefExpr();
1337 case OMPD_target_teams_distribute_parallel_for
:
1338 TaskRedRef
= cast
<OMPTargetTeamsDistributeParallelForDirective
>(D
)
1339 .getTaskReductionRefExpr();
1347 case OMPD_parallel_for_simd
:
1349 case OMPD_taskyield
:
1353 case OMPD_taskgroup
:
1361 case OMPD_cancellation_point
:
1363 case OMPD_target_data
:
1364 case OMPD_target_enter_data
:
1365 case OMPD_target_exit_data
:
1367 case OMPD_taskloop_simd
:
1368 case OMPD_master_taskloop
:
1369 case OMPD_master_taskloop_simd
:
1370 case OMPD_parallel_master_taskloop
:
1371 case OMPD_parallel_master_taskloop_simd
:
1372 case OMPD_distribute
:
1373 case OMPD_target_update
:
1374 case OMPD_distribute_parallel_for_simd
:
1375 case OMPD_distribute_simd
:
1376 case OMPD_target_parallel_for_simd
:
1377 case OMPD_target_simd
:
1378 case OMPD_teams_distribute
:
1379 case OMPD_teams_distribute_simd
:
1380 case OMPD_teams_distribute_parallel_for_simd
:
1381 case OMPD_target_teams
:
1382 case OMPD_target_teams_distribute
:
1383 case OMPD_target_teams_distribute_parallel_for_simd
:
1384 case OMPD_target_teams_distribute_simd
:
1385 case OMPD_declare_target
:
1386 case OMPD_end_declare_target
:
1387 case OMPD_threadprivate
:
1389 case OMPD_declare_reduction
:
1390 case OMPD_declare_mapper
:
1391 case OMPD_declare_simd
:
1393 case OMPD_declare_variant
:
1394 case OMPD_begin_declare_variant
:
1395 case OMPD_end_declare_variant
:
1398 llvm_unreachable("Enexpected directive with task reductions.");
1401 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(TaskRedRef
)->getDecl());
1403 EmitStoreOfScalar(ReductionDesc
, GetAddrOfLocalVar(VD
),
1404 /*Volatile=*/false, TaskRedRef
->getType());
1408 void CodeGenFunction::EmitOMPReductionClauseFinal(
1409 const OMPExecutableDirective
&D
, const OpenMPDirectiveKind ReductionKind
) {
1410 if (!HaveInsertPoint())
1412 llvm::SmallVector
<const Expr
*, 8> Privates
;
1413 llvm::SmallVector
<const Expr
*, 8> LHSExprs
;
1414 llvm::SmallVector
<const Expr
*, 8> RHSExprs
;
1415 llvm::SmallVector
<const Expr
*, 8> ReductionOps
;
1416 bool HasAtLeastOneReduction
= false;
1417 bool IsReductionWithTaskMod
= false;
1418 for (const auto *C
: D
.getClausesOfKind
<OMPReductionClause
>()) {
1419 // Do not emit for inscan reductions.
1420 if (C
->getModifier() == OMPC_REDUCTION_inscan
)
1422 HasAtLeastOneReduction
= true;
1423 Privates
.append(C
->privates().begin(), C
->privates().end());
1424 LHSExprs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
1425 RHSExprs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
1426 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
1427 IsReductionWithTaskMod
=
1428 IsReductionWithTaskMod
|| C
->getModifier() == OMPC_REDUCTION_task
;
1430 if (HasAtLeastOneReduction
) {
1431 if (IsReductionWithTaskMod
) {
1432 CGM
.getOpenMPRuntime().emitTaskReductionFini(
1433 *this, D
.getBeginLoc(),
1434 isOpenMPWorksharingDirective(D
.getDirectiveKind()));
1436 bool WithNowait
= D
.getSingleClause
<OMPNowaitClause
>() ||
1437 isOpenMPParallelDirective(D
.getDirectiveKind()) ||
1438 ReductionKind
== OMPD_simd
;
1439 bool SimpleReduction
= ReductionKind
== OMPD_simd
;
1440 // Emit nowait reduction if nowait clause is present or directive is a
1441 // parallel directive (it always has implicit barrier).
1442 CGM
.getOpenMPRuntime().emitReduction(
1443 *this, D
.getEndLoc(), Privates
, LHSExprs
, RHSExprs
, ReductionOps
,
1444 {WithNowait
, SimpleReduction
, ReductionKind
});
1448 static void emitPostUpdateForReductionClause(
1449 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
1450 const llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> CondGen
) {
1451 if (!CGF
.HaveInsertPoint())
1453 llvm::BasicBlock
*DoneBB
= nullptr;
1454 for (const auto *C
: D
.getClausesOfKind
<OMPReductionClause
>()) {
1455 if (const Expr
*PostUpdate
= C
->getPostUpdateExpr()) {
1457 if (llvm::Value
*Cond
= CondGen(CGF
)) {
1458 // If the first post-update expression is found, emit conditional
1459 // block if it was requested.
1460 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock(".omp.reduction.pu");
1461 DoneBB
= CGF
.createBasicBlock(".omp.reduction.pu.done");
1462 CGF
.Builder
.CreateCondBr(Cond
, ThenBB
, DoneBB
);
1463 CGF
.EmitBlock(ThenBB
);
1466 CGF
.EmitIgnoredExpr(PostUpdate
);
1470 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
1474 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1475 /// parallel function. This is necessary for combined constructs such as
1476 /// 'distribute parallel for'
1477 typedef llvm::function_ref
<void(CodeGenFunction
&,
1478 const OMPExecutableDirective
&,
1479 llvm::SmallVectorImpl
<llvm::Value
*> &)>
1480 CodeGenBoundParametersTy
;
1481 } // anonymous namespace
1484 checkForLastprivateConditionalUpdate(CodeGenFunction
&CGF
,
1485 const OMPExecutableDirective
&S
) {
1486 if (CGF
.getLangOpts().OpenMP
< 50)
1488 llvm::DenseSet
<CanonicalDeclPtr
<const VarDecl
>> PrivateDecls
;
1489 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
1490 for (const Expr
*Ref
: C
->varlists()) {
1491 if (!Ref
->getType()->isScalarType())
1493 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1496 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1497 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, Ref
);
1500 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
1501 for (const Expr
*Ref
: C
->varlists()) {
1502 if (!Ref
->getType()->isScalarType())
1504 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1507 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1508 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, Ref
);
1511 for (const auto *C
: S
.getClausesOfKind
<OMPLinearClause
>()) {
1512 for (const Expr
*Ref
: C
->varlists()) {
1513 if (!Ref
->getType()->isScalarType())
1515 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1518 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1519 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, Ref
);
1522 // Privates should ne analyzed since they are not captured at all.
1523 // Task reductions may be skipped - tasks are ignored.
1524 // Firstprivates do not return value but may be passed by reference - no need
1525 // to check for updated lastprivate conditional.
1526 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
1527 for (const Expr
*Ref
: C
->varlists()) {
1528 if (!Ref
->getType()->isScalarType())
1530 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
1533 PrivateDecls
.insert(cast
<VarDecl
>(DRE
->getDecl()));
1536 CGF
.CGM
.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1537 CGF
, S
, PrivateDecls
);
1540 static void emitCommonOMPParallelDirective(
1541 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
1542 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
1543 const CodeGenBoundParametersTy
&CodeGenBoundParameters
) {
1544 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_parallel
);
1545 llvm::Value
*NumThreads
= nullptr;
1546 llvm::Function
*OutlinedFn
=
1547 CGF
.CGM
.getOpenMPRuntime().emitParallelOutlinedFunction(
1548 CGF
, S
, *CS
->getCapturedDecl()->param_begin(), InnermostKind
,
1550 if (const auto *NumThreadsClause
= S
.getSingleClause
<OMPNumThreadsClause
>()) {
1551 CodeGenFunction::RunCleanupsScope
NumThreadsScope(CGF
);
1552 NumThreads
= CGF
.EmitScalarExpr(NumThreadsClause
->getNumThreads(),
1553 /*IgnoreResultAssign=*/true);
1554 CGF
.CGM
.getOpenMPRuntime().emitNumThreadsClause(
1555 CGF
, NumThreads
, NumThreadsClause
->getBeginLoc());
1557 if (const auto *ProcBindClause
= S
.getSingleClause
<OMPProcBindClause
>()) {
1558 CodeGenFunction::RunCleanupsScope
ProcBindScope(CGF
);
1559 CGF
.CGM
.getOpenMPRuntime().emitProcBindClause(
1560 CGF
, ProcBindClause
->getProcBindKind(), ProcBindClause
->getBeginLoc());
1562 const Expr
*IfCond
= nullptr;
1563 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
1564 if (C
->getNameModifier() == OMPD_unknown
||
1565 C
->getNameModifier() == OMPD_parallel
) {
1566 IfCond
= C
->getCondition();
1571 OMPParallelScope
Scope(CGF
, S
);
1572 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
1573 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1574 // lower and upper bounds with the pragma 'for' chunking mechanism.
1575 // The following lambda takes care of appending the lower and upper bound
1576 // parameters when necessary
1577 CodeGenBoundParameters(CGF
, S
, CapturedVars
);
1578 CGF
.GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
1579 CGF
.CGM
.getOpenMPRuntime().emitParallelCall(CGF
, S
.getBeginLoc(), OutlinedFn
,
1580 CapturedVars
, IfCond
, NumThreads
);
1583 static bool isAllocatableDecl(const VarDecl
*VD
) {
1584 const VarDecl
*CVD
= VD
->getCanonicalDecl();
1585 if (!CVD
->hasAttr
<OMPAllocateDeclAttr
>())
1587 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
1588 // Use the default allocation.
1589 return !((AA
->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc
||
1590 AA
->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc
) &&
1591 !AA
->getAllocator());
1594 static void emitEmptyBoundParameters(CodeGenFunction
&,
1595 const OMPExecutableDirective
&,
1596 llvm::SmallVectorImpl
<llvm::Value
*> &) {}
1598 static void emitOMPCopyinClause(CodeGenFunction
&CGF
,
1599 const OMPExecutableDirective
&S
) {
1600 bool Copyins
= CGF
.EmitOMPCopyinClause(S
);
1602 // Emit implicit barrier to synchronize threads and avoid data races on
1603 // propagation master's thread values of threadprivate variables to local
1604 // instances of that variables of all other implicit threads.
1605 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(
1606 CGF
, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
1607 /*ForceSimpleCall=*/true);
1611 Address
CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1612 CodeGenFunction
&CGF
, const VarDecl
*VD
) {
1613 CodeGenModule
&CGM
= CGF
.CGM
;
1614 auto &OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1617 return Address::invalid();
1618 const VarDecl
*CVD
= VD
->getCanonicalDecl();
1619 if (!isAllocatableDecl(CVD
))
1620 return Address::invalid();
1622 CharUnits Align
= CGM
.getContext().getDeclAlign(CVD
);
1623 if (CVD
->getType()->isVariablyModifiedType()) {
1624 Size
= CGF
.getTypeSize(CVD
->getType());
1625 // Align the size: ((size + align - 1) / align) * align
1626 Size
= CGF
.Builder
.CreateNUWAdd(
1627 Size
, CGM
.getSize(Align
- CharUnits::fromQuantity(1)));
1628 Size
= CGF
.Builder
.CreateUDiv(Size
, CGM
.getSize(Align
));
1629 Size
= CGF
.Builder
.CreateNUWMul(Size
, CGM
.getSize(Align
));
1631 CharUnits Sz
= CGM
.getContext().getTypeSizeInChars(CVD
->getType());
1632 Size
= CGM
.getSize(Sz
.alignTo(Align
));
1635 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
1636 assert(AA
->getAllocator() &&
1637 "Expected allocator expression for non-default allocator.");
1638 llvm::Value
*Allocator
= CGF
.EmitScalarExpr(AA
->getAllocator());
1639 // According to the standard, the original allocator type is a enum (integer).
1640 // Convert to pointer type, if required.
1641 if (Allocator
->getType()->isIntegerTy())
1642 Allocator
= CGF
.Builder
.CreateIntToPtr(Allocator
, CGM
.VoidPtrTy
);
1643 else if (Allocator
->getType()->isPointerTy())
1644 Allocator
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(Allocator
,
1647 llvm::Value
*Addr
= OMPBuilder
.createOMPAlloc(
1648 CGF
.Builder
, Size
, Allocator
,
1649 getNameWithSeparators({CVD
->getName(), ".void.addr"}, ".", "."));
1650 llvm::CallInst
*FreeCI
=
1651 OMPBuilder
.createOMPFree(CGF
.Builder
, Addr
, Allocator
);
1653 CGF
.EHStack
.pushCleanup
<OMPAllocateCleanupTy
>(NormalAndEHCleanup
, FreeCI
);
1654 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
1656 CGF
.ConvertTypeForMem(CGM
.getContext().getPointerType(CVD
->getType())),
1657 getNameWithSeparators({CVD
->getName(), ".addr"}, ".", "."));
1658 return Address(Addr
, CGF
.ConvertTypeForMem(CVD
->getType()), Align
);
1661 Address
CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1662 CodeGenFunction
&CGF
, const VarDecl
*VD
, Address VDAddr
,
1663 SourceLocation Loc
) {
1664 CodeGenModule
&CGM
= CGF
.CGM
;
1665 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1666 CGM
.getContext().getTargetInfo().isTLSSupported())
1669 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1671 llvm::Type
*VarTy
= VDAddr
.getElementType();
1673 CGF
.Builder
.CreatePointerCast(VDAddr
.getPointer(), CGM
.Int8PtrTy
);
1674 llvm::ConstantInt
*Size
= CGM
.getSize(CGM
.GetTargetTypeStoreSize(VarTy
));
1675 std::string Suffix
= getNameWithSeparators({"cache", ""});
1676 llvm::Twine CacheName
= Twine(CGM
.getMangledName(VD
)).concat(Suffix
);
1678 llvm::CallInst
*ThreadPrivateCacheCall
=
1679 OMPBuilder
.createCachedThreadPrivate(CGF
.Builder
, Data
, Size
, CacheName
);
1681 return Address(ThreadPrivateCacheCall
, CGM
.Int8Ty
, VDAddr
.getAlignment());
1684 std::string
CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1685 ArrayRef
<StringRef
> Parts
, StringRef FirstSeparator
, StringRef Separator
) {
1686 SmallString
<128> Buffer
;
1687 llvm::raw_svector_ostream
OS(Buffer
);
1688 StringRef Sep
= FirstSeparator
;
1689 for (StringRef Part
: Parts
) {
1693 return OS
.str().str();
1696 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1697 CodeGenFunction
&CGF
, const Stmt
*RegionBodyStmt
, InsertPointTy AllocaIP
,
1698 InsertPointTy CodeGenIP
, Twine RegionName
) {
1699 CGBuilderTy
&Builder
= CGF
.Builder
;
1700 Builder
.restoreIP(CodeGenIP
);
1701 llvm::BasicBlock
*FiniBB
= splitBBWithSuffix(Builder
, /*CreateBranch=*/false,
1702 "." + RegionName
+ ".after");
1705 OMPBuilderCBHelpers::InlinedRegionBodyRAII
IRB(CGF
, AllocaIP
, *FiniBB
);
1706 CGF
.EmitStmt(RegionBodyStmt
);
1709 if (Builder
.saveIP().isSet())
1710 Builder
.CreateBr(FiniBB
);
1713 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1714 CodeGenFunction
&CGF
, const Stmt
*RegionBodyStmt
, InsertPointTy AllocaIP
,
1715 InsertPointTy CodeGenIP
, Twine RegionName
) {
1716 CGBuilderTy
&Builder
= CGF
.Builder
;
1717 Builder
.restoreIP(CodeGenIP
);
1718 llvm::BasicBlock
*FiniBB
= splitBBWithSuffix(Builder
, /*CreateBranch=*/false,
1719 "." + RegionName
+ ".after");
1722 OMPBuilderCBHelpers::OutlinedRegionBodyRAII
IRB(CGF
, AllocaIP
, *FiniBB
);
1723 CGF
.EmitStmt(RegionBodyStmt
);
1726 if (Builder
.saveIP().isSet())
1727 Builder
.CreateBr(FiniBB
);
1730 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective
&S
) {
1731 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
1732 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1733 // Check if we have any if clause associated with the directive.
1734 llvm::Value
*IfCond
= nullptr;
1735 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
1736 IfCond
= EmitScalarExpr(C
->getCondition(),
1737 /*IgnoreResultAssign=*/true);
1739 llvm::Value
*NumThreads
= nullptr;
1740 if (const auto *NumThreadsClause
= S
.getSingleClause
<OMPNumThreadsClause
>())
1741 NumThreads
= EmitScalarExpr(NumThreadsClause
->getNumThreads(),
1742 /*IgnoreResultAssign=*/true);
1744 ProcBindKind ProcBind
= OMP_PROC_BIND_default
;
1745 if (const auto *ProcBindClause
= S
.getSingleClause
<OMPProcBindClause
>())
1746 ProcBind
= ProcBindClause
->getProcBindKind();
1748 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
1750 // The cleanup callback that finalizes all variabels at the given location,
1751 // thus calls destructors etc.
1752 auto FiniCB
= [this](InsertPointTy IP
) {
1753 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
1756 // Privatization callback that performs appropriate action for
1757 // shared/private/firstprivate/lastprivate/copyin/... variables.
1759 // TODO: This defaults to shared right now.
1760 auto PrivCB
= [](InsertPointTy AllocaIP
, InsertPointTy CodeGenIP
,
1761 llvm::Value
&, llvm::Value
&Val
, llvm::Value
*&ReplVal
) {
1762 // The next line is appropriate only for variables (Val) with the
1763 // data-sharing attribute "shared".
1769 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_parallel
);
1770 const Stmt
*ParallelRegionBodyStmt
= CS
->getCapturedStmt();
1772 auto BodyGenCB
= [&, this](InsertPointTy AllocaIP
,
1773 InsertPointTy CodeGenIP
) {
1774 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1775 *this, ParallelRegionBodyStmt
, AllocaIP
, CodeGenIP
, "parallel");
1778 CGCapturedStmtInfo
CGSI(*CS
, CR_OpenMP
);
1779 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(*this, &CGSI
);
1780 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
1781 AllocaInsertPt
->getParent(), AllocaInsertPt
->getIterator());
1783 OMPBuilder
.createParallel(Builder
, AllocaIP
, BodyGenCB
, PrivCB
, FiniCB
,
1784 IfCond
, NumThreads
, ProcBind
, S
.hasCancel()));
1788 // Emit parallel region as a standalone region.
1789 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
1791 OMPPrivateScope
PrivateScope(CGF
);
1792 emitOMPCopyinClause(CGF
, S
);
1793 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
1794 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
1795 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
1796 (void)PrivateScope
.Privatize();
1797 CGF
.EmitStmt(S
.getCapturedStmt(OMPD_parallel
)->getCapturedStmt());
1798 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
1802 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
1803 emitCommonOMPParallelDirective(*this, S
, OMPD_parallel
, CodeGen
,
1804 emitEmptyBoundParameters
);
1805 emitPostUpdateForReductionClause(*this, S
,
1806 [](CodeGenFunction
&) { return nullptr; });
1808 // Check for outer lastprivate conditional update.
1809 checkForLastprivateConditionalUpdate(*this, S
);
1812 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective
&S
) {
1813 EmitStmt(S
.getIfStmt());
1817 /// RAII to handle scopes for loop transformation directives.
1818 class OMPTransformDirectiveScopeRAII
{
1819 OMPLoopScope
*Scope
= nullptr;
1820 CodeGenFunction::CGCapturedStmtInfo
*CGSI
= nullptr;
1821 CodeGenFunction::CGCapturedStmtRAII
*CapInfoRAII
= nullptr;
1823 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII
&) =
1825 OMPTransformDirectiveScopeRAII
&
1826 operator=(const OMPTransformDirectiveScopeRAII
&) = delete;
1829 OMPTransformDirectiveScopeRAII(CodeGenFunction
&CGF
, const Stmt
*S
) {
1830 if (const auto *Dir
= dyn_cast
<OMPLoopBasedDirective
>(S
)) {
1831 Scope
= new OMPLoopScope(CGF
, *Dir
);
1832 CGSI
= new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP
);
1833 CapInfoRAII
= new CodeGenFunction::CGCapturedStmtRAII(CGF
, CGSI
);
1836 ~OMPTransformDirectiveScopeRAII() {
1846 static void emitBody(CodeGenFunction
&CGF
, const Stmt
*S
, const Stmt
*NextLoop
,
1847 int MaxLevel
, int Level
= 0) {
1848 assert(Level
< MaxLevel
&& "Too deep lookup during loop body codegen.");
1849 const Stmt
*SimplifiedS
= S
->IgnoreContainers();
1850 if (const auto *CS
= dyn_cast
<CompoundStmt
>(SimplifiedS
)) {
1851 PrettyStackTraceLoc
CrashInfo(
1852 CGF
.getContext().getSourceManager(), CS
->getLBracLoc(),
1853 "LLVM IR generation of compound statement ('{}')");
1855 // Keep track of the current cleanup stack depth, including debug scopes.
1856 CodeGenFunction::LexicalScope
Scope(CGF
, S
->getSourceRange());
1857 for (const Stmt
*CurStmt
: CS
->body())
1858 emitBody(CGF
, CurStmt
, NextLoop
, MaxLevel
, Level
);
1861 if (SimplifiedS
== NextLoop
) {
1862 if (auto *Dir
= dyn_cast
<OMPLoopTransformationDirective
>(SimplifiedS
))
1863 SimplifiedS
= Dir
->getTransformedStmt();
1864 if (const auto *CanonLoop
= dyn_cast
<OMPCanonicalLoop
>(SimplifiedS
))
1865 SimplifiedS
= CanonLoop
->getLoopStmt();
1866 if (const auto *For
= dyn_cast
<ForStmt
>(SimplifiedS
)) {
1869 assert(isa
<CXXForRangeStmt
>(SimplifiedS
) &&
1870 "Expected canonical for loop or range-based for loop.");
1871 const auto *CXXFor
= cast
<CXXForRangeStmt
>(SimplifiedS
);
1872 CGF
.EmitStmt(CXXFor
->getLoopVarStmt());
1873 S
= CXXFor
->getBody();
1875 if (Level
+ 1 < MaxLevel
) {
1876 NextLoop
= OMPLoopDirective::tryToFindNextInnerLoop(
1877 S
, /*TryImperfectlyNestedLoops=*/true);
1878 emitBody(CGF
, S
, NextLoop
, MaxLevel
, Level
+ 1);
1885 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective
&D
,
1886 JumpDest LoopExit
) {
1887 RunCleanupsScope
BodyScope(*this);
1888 // Update counters values on current iteration.
1889 for (const Expr
*UE
: D
.updates())
1890 EmitIgnoredExpr(UE
);
1891 // Update the linear variables.
1892 // In distribute directives only loop counters may be marked as linear, no
1893 // need to generate the code for them.
1894 if (!isOpenMPDistributeDirective(D
.getDirectiveKind())) {
1895 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
1896 for (const Expr
*UE
: C
->updates())
1897 EmitIgnoredExpr(UE
);
1901 // On a continue in the body, jump to the end.
1902 JumpDest Continue
= getJumpDestInCurrentScope("omp.body.continue");
1903 BreakContinueStack
.push_back(BreakContinue(LoopExit
, Continue
));
1904 for (const Expr
*E
: D
.finals_conditions()) {
1907 // Check that loop counter in non-rectangular nest fits into the iteration
1909 llvm::BasicBlock
*NextBB
= createBasicBlock("omp.body.next");
1910 EmitBranchOnBoolExpr(E
, NextBB
, Continue
.getBlock(),
1911 getProfileCount(D
.getBody()));
1915 OMPPrivateScope
InscanScope(*this);
1916 EmitOMPReductionClauseInit(D
, InscanScope
, /*ForInscan=*/true);
1917 bool IsInscanRegion
= InscanScope
.Privatize();
1918 if (IsInscanRegion
) {
1919 // Need to remember the block before and after scan directive
1920 // to dispatch them correctly depending on the clause used in
1921 // this directive, inclusive or exclusive. For inclusive scan the natural
1922 // order of the blocks is used, for exclusive clause the blocks must be
1923 // executed in reverse order.
1924 OMPBeforeScanBlock
= createBasicBlock("omp.before.scan.bb");
1925 OMPAfterScanBlock
= createBasicBlock("omp.after.scan.bb");
1926 // No need to allocate inscan exit block, in simd mode it is selected in the
1927 // codegen for the scan directive.
1928 if (D
.getDirectiveKind() != OMPD_simd
&& !getLangOpts().OpenMPSimd
)
1929 OMPScanExitBlock
= createBasicBlock("omp.exit.inscan.bb");
1930 OMPScanDispatch
= createBasicBlock("omp.inscan.dispatch");
1931 EmitBranch(OMPScanDispatch
);
1932 EmitBlock(OMPBeforeScanBlock
);
1935 // Emit loop variables for C++ range loops.
1937 D
.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1939 emitBody(*this, Body
,
1940 OMPLoopBasedDirective::tryToFindNextInnerLoop(
1941 Body
, /*TryImperfectlyNestedLoops=*/true),
1942 D
.getLoopsNumber());
1944 // Jump to the dispatcher at the end of the loop body.
1946 EmitBranch(OMPScanExitBlock
);
1948 // The end (updates/cleanups).
1949 EmitBlock(Continue
.getBlock());
1950 BreakContinueStack
.pop_back();
1953 using EmittedClosureTy
= std::pair
<llvm::Function
*, llvm::Value
*>;
1955 /// Emit a captured statement and return the function as well as its captured
1956 /// closure context.
1957 static EmittedClosureTy
emitCapturedStmtFunc(CodeGenFunction
&ParentCGF
,
1958 const CapturedStmt
*S
) {
1959 LValue CapStruct
= ParentCGF
.InitCapturedStruct(*S
);
1960 CodeGenFunction
CGF(ParentCGF
.CGM
, /*suppressNewContext=*/true);
1961 std::unique_ptr
<CodeGenFunction::CGCapturedStmtInfo
> CSI
=
1962 std::make_unique
<CodeGenFunction::CGCapturedStmtInfo
>(*S
);
1963 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, CSI
.get());
1964 llvm::Function
*F
= CGF
.GenerateCapturedStmtFunction(*S
);
1966 return {F
, CapStruct
.getPointer(ParentCGF
)};
1969 /// Emit a call to a previously captured closure.
1970 static llvm::CallInst
*
1971 emitCapturedStmtCall(CodeGenFunction
&ParentCGF
, EmittedClosureTy Cap
,
1972 llvm::ArrayRef
<llvm::Value
*> Args
) {
1973 // Append the closure context to the argument.
1974 SmallVector
<llvm::Value
*> EffectiveArgs
;
1975 EffectiveArgs
.reserve(Args
.size() + 1);
1976 llvm::append_range(EffectiveArgs
, Args
);
1977 EffectiveArgs
.push_back(Cap
.second
);
1979 return ParentCGF
.Builder
.CreateCall(Cap
.first
, EffectiveArgs
);
1982 llvm::CanonicalLoopInfo
*
1983 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt
*S
, int Depth
) {
1984 assert(Depth
== 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1986 // The caller is processing the loop-associated directive processing the \p
1987 // Depth loops nested in \p S. Put the previous pending loop-associated
1988 // directive to the stack. If the current loop-associated directive is a loop
1989 // transformation directive, it will push its generated loops onto the stack
1990 // such that together with the loops left here they form the combined loop
1991 // nest for the parent loop-associated directive.
1992 int ParentExpectedOMPLoopDepth
= ExpectedOMPLoopDepth
;
1993 ExpectedOMPLoopDepth
= Depth
;
1996 assert(OMPLoopNestStack
.size() >= (size_t)Depth
&& "Found too few loops");
1998 // The last added loop is the outermost one.
1999 llvm::CanonicalLoopInfo
*Result
= OMPLoopNestStack
.back();
2001 // Pop the \p Depth loops requested by the call from that stack and restore
2002 // the previous context.
2003 OMPLoopNestStack
.pop_back_n(Depth
);
2004 ExpectedOMPLoopDepth
= ParentExpectedOMPLoopDepth
;
2009 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop
*S
) {
2010 const Stmt
*SyntacticalLoop
= S
->getLoopStmt();
2011 if (!getLangOpts().OpenMPIRBuilder
) {
2012 // Ignore if OpenMPIRBuilder is not enabled.
2013 EmitStmt(SyntacticalLoop
);
2017 LexicalScope
ForScope(*this, S
->getSourceRange());
2019 // Emit init statements. The Distance/LoopVar funcs may reference variable
2020 // declarations they contain.
2021 const Stmt
*BodyStmt
;
2022 if (const auto *For
= dyn_cast
<ForStmt
>(SyntacticalLoop
)) {
2023 if (const Stmt
*InitStmt
= For
->getInit())
2025 BodyStmt
= For
->getBody();
2026 } else if (const auto *RangeFor
=
2027 dyn_cast
<CXXForRangeStmt
>(SyntacticalLoop
)) {
2028 if (const DeclStmt
*RangeStmt
= RangeFor
->getRangeStmt())
2029 EmitStmt(RangeStmt
);
2030 if (const DeclStmt
*BeginStmt
= RangeFor
->getBeginStmt())
2031 EmitStmt(BeginStmt
);
2032 if (const DeclStmt
*EndStmt
= RangeFor
->getEndStmt())
2034 if (const DeclStmt
*LoopVarStmt
= RangeFor
->getLoopVarStmt())
2035 EmitStmt(LoopVarStmt
);
2036 BodyStmt
= RangeFor
->getBody();
2038 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2040 // Emit closure for later use. By-value captures will be captured here.
2041 const CapturedStmt
*DistanceFunc
= S
->getDistanceFunc();
2042 EmittedClosureTy DistanceClosure
= emitCapturedStmtFunc(*this, DistanceFunc
);
2043 const CapturedStmt
*LoopVarFunc
= S
->getLoopVarFunc();
2044 EmittedClosureTy LoopVarClosure
= emitCapturedStmtFunc(*this, LoopVarFunc
);
2046 // Call the distance function to get the number of iterations of the loop to
2048 QualType LogicalTy
= DistanceFunc
->getCapturedDecl()
2051 .getNonReferenceType();
2052 Address CountAddr
= CreateMemTemp(LogicalTy
, ".count.addr");
2053 emitCapturedStmtCall(*this, DistanceClosure
, {CountAddr
.getPointer()});
2054 llvm::Value
*DistVal
= Builder
.CreateLoad(CountAddr
, ".count");
2056 // Emit the loop structure.
2057 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
2058 auto BodyGen
= [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP
,
2059 llvm::Value
*IndVar
) {
2060 Builder
.restoreIP(CodeGenIP
);
2062 // Emit the loop body: Convert the logical iteration number to the loop
2063 // variable and emit the body.
2064 const DeclRefExpr
*LoopVarRef
= S
->getLoopVarRef();
2065 LValue LCVal
= EmitLValue(LoopVarRef
);
2066 Address LoopVarAddress
= LCVal
.getAddress(*this);
2067 emitCapturedStmtCall(*this, LoopVarClosure
,
2068 {LoopVarAddress
.getPointer(), IndVar
});
2070 RunCleanupsScope
BodyScope(*this);
2073 llvm::CanonicalLoopInfo
*CL
=
2074 OMPBuilder
.createCanonicalLoop(Builder
, BodyGen
, DistVal
);
2076 // Finish up the loop.
2077 Builder
.restoreIP(CL
->getAfterIP());
2078 ForScope
.ForceCleanup();
2080 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2081 OMPLoopNestStack
.push_back(CL
);
2084 void CodeGenFunction::EmitOMPInnerLoop(
2085 const OMPExecutableDirective
&S
, bool RequiresCleanup
, const Expr
*LoopCond
,
2086 const Expr
*IncExpr
,
2087 const llvm::function_ref
<void(CodeGenFunction
&)> BodyGen
,
2088 const llvm::function_ref
<void(CodeGenFunction
&)> PostIncGen
) {
2089 auto LoopExit
= getJumpDestInCurrentScope("omp.inner.for.end");
2091 // Start the loop with a block that tests the condition.
2092 auto CondBlock
= createBasicBlock("omp.inner.for.cond");
2093 EmitBlock(CondBlock
);
2094 const SourceRange R
= S
.getSourceRange();
2096 // If attributes are attached, push to the basic block with them.
2097 const auto &OMPED
= cast
<OMPExecutableDirective
>(S
);
2098 const CapturedStmt
*ICS
= OMPED
.getInnermostCapturedStmt();
2099 const Stmt
*SS
= ICS
->getCapturedStmt();
2100 const AttributedStmt
*AS
= dyn_cast_or_null
<AttributedStmt
>(SS
);
2101 OMPLoopNestStack
.clear();
2103 LoopStack
.push(CondBlock
, CGM
.getContext(), CGM
.getCodeGenOpts(),
2104 AS
->getAttrs(), SourceLocToDebugLoc(R
.getBegin()),
2105 SourceLocToDebugLoc(R
.getEnd()));
2107 LoopStack
.push(CondBlock
, SourceLocToDebugLoc(R
.getBegin()),
2108 SourceLocToDebugLoc(R
.getEnd()));
2110 // If there are any cleanups between here and the loop-exit scope,
2111 // create a block to stage a loop exit along.
2112 llvm::BasicBlock
*ExitBlock
= LoopExit
.getBlock();
2113 if (RequiresCleanup
)
2114 ExitBlock
= createBasicBlock("omp.inner.for.cond.cleanup");
2116 llvm::BasicBlock
*LoopBody
= createBasicBlock("omp.inner.for.body");
2119 EmitBranchOnBoolExpr(LoopCond
, LoopBody
, ExitBlock
, getProfileCount(&S
));
2120 if (ExitBlock
!= LoopExit
.getBlock()) {
2121 EmitBlock(ExitBlock
);
2122 EmitBranchThroughCleanup(LoopExit
);
2125 EmitBlock(LoopBody
);
2126 incrementProfileCounter(&S
);
2128 // Create a block for the increment.
2129 JumpDest Continue
= getJumpDestInCurrentScope("omp.inner.for.inc");
2130 BreakContinueStack
.push_back(BreakContinue(LoopExit
, Continue
));
2134 // Emit "IV = IV + 1" and a back-edge to the condition block.
2135 EmitBlock(Continue
.getBlock());
2136 EmitIgnoredExpr(IncExpr
);
2138 BreakContinueStack
.pop_back();
2139 EmitBranch(CondBlock
);
2141 // Emit the fall-through block.
2142 EmitBlock(LoopExit
.getBlock());
2145 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective
&D
) {
2146 if (!HaveInsertPoint())
2148 // Emit inits for the linear variables.
2149 bool HasLinears
= false;
2150 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
2151 for (const Expr
*Init
: C
->inits()) {
2153 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(Init
)->getDecl());
2154 if (const auto *Ref
=
2155 dyn_cast
<DeclRefExpr
>(VD
->getInit()->IgnoreImpCasts())) {
2156 AutoVarEmission Emission
= EmitAutoVarAlloca(*VD
);
2157 const auto *OrigVD
= cast
<VarDecl
>(Ref
->getDecl());
2158 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
2159 CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
2160 VD
->getInit()->getType(), VK_LValue
,
2161 VD
->getInit()->getExprLoc());
2164 MakeAddrLValue(Emission
.getAllocatedAddress(), VD
->getType()),
2165 /*capturedByInit=*/false);
2166 EmitAutoVarCleanups(Emission
);
2171 // Emit the linear steps for the linear clauses.
2172 // If a step is not constant, it is pre-calculated before the loop.
2173 if (const auto *CS
= cast_or_null
<BinaryOperator
>(C
->getCalcStep()))
2174 if (const auto *SaveRef
= cast
<DeclRefExpr
>(CS
->getLHS())) {
2175 EmitVarDecl(*cast
<VarDecl
>(SaveRef
->getDecl()));
2176 // Emit calculation of the linear step.
2177 EmitIgnoredExpr(CS
);
2183 void CodeGenFunction::EmitOMPLinearClauseFinal(
2184 const OMPLoopDirective
&D
,
2185 const llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> CondGen
) {
2186 if (!HaveInsertPoint())
2188 llvm::BasicBlock
*DoneBB
= nullptr;
2189 // Emit the final values of the linear variables.
2190 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
2191 auto IC
= C
->varlist_begin();
2192 for (const Expr
*F
: C
->finals()) {
2194 if (llvm::Value
*Cond
= CondGen(*this)) {
2195 // If the first post-update expression is found, emit conditional
2196 // block if it was requested.
2197 llvm::BasicBlock
*ThenBB
= createBasicBlock(".omp.linear.pu");
2198 DoneBB
= createBasicBlock(".omp.linear.pu.done");
2199 Builder
.CreateCondBr(Cond
, ThenBB
, DoneBB
);
2203 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IC
)->getDecl());
2204 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(OrigVD
),
2205 CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
2206 (*IC
)->getType(), VK_LValue
, (*IC
)->getExprLoc());
2207 Address OrigAddr
= EmitLValue(&DRE
).getAddress(*this);
2208 CodeGenFunction::OMPPrivateScope
VarScope(*this);
2209 VarScope
.addPrivate(OrigVD
, OrigAddr
);
2210 (void)VarScope
.Privatize();
2214 if (const Expr
*PostUpdate
= C
->getPostUpdateExpr())
2215 EmitIgnoredExpr(PostUpdate
);
2218 EmitBlock(DoneBB
, /*IsFinished=*/true);
2221 static void emitAlignedClause(CodeGenFunction
&CGF
,
2222 const OMPExecutableDirective
&D
) {
2223 if (!CGF
.HaveInsertPoint())
2225 for (const auto *Clause
: D
.getClausesOfKind
<OMPAlignedClause
>()) {
2226 llvm::APInt
ClauseAlignment(64, 0);
2227 if (const Expr
*AlignmentExpr
= Clause
->getAlignment()) {
2229 cast
<llvm::ConstantInt
>(CGF
.EmitScalarExpr(AlignmentExpr
));
2230 ClauseAlignment
= AlignmentCI
->getValue();
2232 for (const Expr
*E
: Clause
->varlists()) {
2233 llvm::APInt
Alignment(ClauseAlignment
);
2234 if (Alignment
== 0) {
2235 // OpenMP [2.8.1, Description]
2236 // If no optional parameter is specified, implementation-defined default
2237 // alignments for SIMD instructions on the target platforms are assumed.
2240 .toCharUnitsFromBits(CGF
.getContext().getOpenMPDefaultSimdAlign(
2241 E
->getType()->getPointeeType()))
2244 assert((Alignment
== 0 || Alignment
.isPowerOf2()) &&
2245 "alignment is not power of 2");
2246 if (Alignment
!= 0) {
2247 llvm::Value
*PtrValue
= CGF
.EmitScalarExpr(E
);
2248 CGF
.emitAlignmentAssumption(
2249 PtrValue
, E
, /*No second loc needed*/ SourceLocation(),
2250 llvm::ConstantInt::get(CGF
.getLLVMContext(), Alignment
));
2256 void CodeGenFunction::EmitOMPPrivateLoopCounters(
2257 const OMPLoopDirective
&S
, CodeGenFunction::OMPPrivateScope
&LoopScope
) {
2258 if (!HaveInsertPoint())
2260 auto I
= S
.private_counters().begin();
2261 for (const Expr
*E
: S
.counters()) {
2262 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
2263 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl());
2264 // Emit var without initialization.
2265 AutoVarEmission VarEmission
= EmitAutoVarAlloca(*PrivateVD
);
2266 EmitAutoVarCleanups(VarEmission
);
2267 LocalDeclMap
.erase(PrivateVD
);
2268 (void)LoopScope
.addPrivate(VD
, VarEmission
.getAllocatedAddress());
2269 if (LocalDeclMap
.count(VD
) || CapturedStmtInfo
->lookup(VD
) ||
2270 VD
->hasGlobalStorage()) {
2271 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(VD
),
2272 LocalDeclMap
.count(VD
) || CapturedStmtInfo
->lookup(VD
),
2273 E
->getType(), VK_LValue
, E
->getExprLoc());
2274 (void)LoopScope
.addPrivate(PrivateVD
, EmitLValue(&DRE
).getAddress(*this));
2276 (void)LoopScope
.addPrivate(PrivateVD
, VarEmission
.getAllocatedAddress());
2280 // Privatize extra loop counters used in loops for ordered(n) clauses.
2281 for (const auto *C
: S
.getClausesOfKind
<OMPOrderedClause
>()) {
2282 if (!C
->getNumForLoops())
2284 for (unsigned I
= S
.getLoopsNumber(), E
= C
->getLoopNumIterations().size();
2286 const auto *DRE
= cast
<DeclRefExpr
>(C
->getLoopCounter(I
));
2287 const auto *VD
= cast
<VarDecl
>(DRE
->getDecl());
2288 // Override only those variables that can be captured to avoid re-emission
2289 // of the variables declared within the loops.
2290 if (DRE
->refersToEnclosingVariableOrCapture()) {
2291 (void)LoopScope
.addPrivate(
2292 VD
, CreateMemTemp(DRE
->getType(), VD
->getName()));
2298 static void emitPreCond(CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2299 const Expr
*Cond
, llvm::BasicBlock
*TrueBlock
,
2300 llvm::BasicBlock
*FalseBlock
, uint64_t TrueCount
) {
2301 if (!CGF
.HaveInsertPoint())
2304 CodeGenFunction::OMPPrivateScope
PreCondScope(CGF
);
2305 CGF
.EmitOMPPrivateLoopCounters(S
, PreCondScope
);
2306 (void)PreCondScope
.Privatize();
2307 // Get initial values of real counters.
2308 for (const Expr
*I
: S
.inits()) {
2309 CGF
.EmitIgnoredExpr(I
);
2312 // Create temp loop control variables with their init values to support
2313 // non-rectangular loops.
2314 CodeGenFunction::OMPMapVars PreCondVars
;
2315 for (const Expr
*E
: S
.dependent_counters()) {
2318 assert(!E
->getType().getNonReferenceType()->isRecordType() &&
2319 "dependent counter must not be an iterator.");
2320 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
2321 Address CounterAddr
=
2322 CGF
.CreateMemTemp(VD
->getType().getNonReferenceType());
2323 (void)PreCondVars
.setVarAddr(CGF
, VD
, CounterAddr
);
2325 (void)PreCondVars
.apply(CGF
);
2326 for (const Expr
*E
: S
.dependent_inits()) {
2329 CGF
.EmitIgnoredExpr(E
);
2331 // Check that loop is executed at least one time.
2332 CGF
.EmitBranchOnBoolExpr(Cond
, TrueBlock
, FalseBlock
, TrueCount
);
2333 PreCondVars
.restore(CGF
);
2336 void CodeGenFunction::EmitOMPLinearClause(
2337 const OMPLoopDirective
&D
, CodeGenFunction::OMPPrivateScope
&PrivateScope
) {
2338 if (!HaveInsertPoint())
2340 llvm::DenseSet
<const VarDecl
*> SIMDLCVs
;
2341 if (isOpenMPSimdDirective(D
.getDirectiveKind())) {
2342 const auto *LoopDirective
= cast
<OMPLoopDirective
>(&D
);
2343 for (const Expr
*C
: LoopDirective
->counters()) {
2345 cast
<VarDecl
>(cast
<DeclRefExpr
>(C
)->getDecl())->getCanonicalDecl());
2348 for (const auto *C
: D
.getClausesOfKind
<OMPLinearClause
>()) {
2349 auto CurPrivate
= C
->privates().begin();
2350 for (const Expr
*E
: C
->varlists()) {
2351 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
2352 const auto *PrivateVD
=
2353 cast
<VarDecl
>(cast
<DeclRefExpr
>(*CurPrivate
)->getDecl());
2354 if (!SIMDLCVs
.count(VD
->getCanonicalDecl())) {
2355 // Emit private VarDecl with copy init.
2356 EmitVarDecl(*PrivateVD
);
2358 PrivateScope
.addPrivate(VD
, GetAddrOfLocalVar(PrivateVD
));
2359 assert(IsRegistered
&& "linear var already registered as private");
2360 // Silence the warning about unused variable.
2363 EmitVarDecl(*PrivateVD
);
2370 static void emitSimdlenSafelenClause(CodeGenFunction
&CGF
,
2371 const OMPExecutableDirective
&D
) {
2372 if (!CGF
.HaveInsertPoint())
2374 if (const auto *C
= D
.getSingleClause
<OMPSimdlenClause
>()) {
2375 RValue Len
= CGF
.EmitAnyExpr(C
->getSimdlen(), AggValueSlot::ignored(),
2376 /*ignoreResult=*/true);
2377 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2378 CGF
.LoopStack
.setVectorizeWidth(Val
->getZExtValue());
2379 // In presence of finite 'safelen', it may be unsafe to mark all
2380 // the memory instructions parallel, because loop-carried
2381 // dependences of 'safelen' iterations are possible.
2382 CGF
.LoopStack
.setParallel(!D
.getSingleClause
<OMPSafelenClause
>());
2383 } else if (const auto *C
= D
.getSingleClause
<OMPSafelenClause
>()) {
2384 RValue Len
= CGF
.EmitAnyExpr(C
->getSafelen(), AggValueSlot::ignored(),
2385 /*ignoreResult=*/true);
2386 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2387 CGF
.LoopStack
.setVectorizeWidth(Val
->getZExtValue());
2388 // In presence of finite 'safelen', it may be unsafe to mark all
2389 // the memory instructions parallel, because loop-carried
2390 // dependences of 'safelen' iterations are possible.
2391 CGF
.LoopStack
.setParallel(/*Enable=*/false);
2395 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
&D
) {
2396 // Walk clauses and process safelen/lastprivate.
2397 LoopStack
.setParallel(/*Enable=*/true);
2398 LoopStack
.setVectorizeEnable();
2399 emitSimdlenSafelenClause(*this, D
);
2400 if (const auto *C
= D
.getSingleClause
<OMPOrderClause
>())
2401 if (C
->getKind() == OMPC_ORDER_concurrent
)
2402 LoopStack
.setParallel(/*Enable=*/true);
2403 if ((D
.getDirectiveKind() == OMPD_simd
||
2404 (getLangOpts().OpenMPSimd
&&
2405 isOpenMPSimdDirective(D
.getDirectiveKind()))) &&
2406 llvm::any_of(D
.getClausesOfKind
<OMPReductionClause
>(),
2407 [](const OMPReductionClause
*C
) {
2408 return C
->getModifier() == OMPC_REDUCTION_inscan
;
2410 // Disable parallel access in case of prefix sum.
2411 LoopStack
.setParallel(/*Enable=*/false);
2414 void CodeGenFunction::EmitOMPSimdFinal(
2415 const OMPLoopDirective
&D
,
2416 const llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> CondGen
) {
2417 if (!HaveInsertPoint())
2419 llvm::BasicBlock
*DoneBB
= nullptr;
2420 auto IC
= D
.counters().begin();
2421 auto IPC
= D
.private_counters().begin();
2422 for (const Expr
*F
: D
.finals()) {
2423 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>((*IC
))->getDecl());
2424 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>((*IPC
))->getDecl());
2425 const auto *CED
= dyn_cast
<OMPCapturedExprDecl
>(OrigVD
);
2426 if (LocalDeclMap
.count(OrigVD
) || CapturedStmtInfo
->lookup(OrigVD
) ||
2427 OrigVD
->hasGlobalStorage() || CED
) {
2429 if (llvm::Value
*Cond
= CondGen(*this)) {
2430 // If the first post-update expression is found, emit conditional
2431 // block if it was requested.
2432 llvm::BasicBlock
*ThenBB
= createBasicBlock(".omp.final.then");
2433 DoneBB
= createBasicBlock(".omp.final.done");
2434 Builder
.CreateCondBr(Cond
, ThenBB
, DoneBB
);
2438 Address OrigAddr
= Address::invalid();
2441 EmitLValue(CED
->getInit()->IgnoreImpCasts()).getAddress(*this);
2443 DeclRefExpr
DRE(getContext(), const_cast<VarDecl
*>(PrivateVD
),
2444 /*RefersToEnclosingVariableOrCapture=*/false,
2445 (*IPC
)->getType(), VK_LValue
, (*IPC
)->getExprLoc());
2446 OrigAddr
= EmitLValue(&DRE
).getAddress(*this);
2448 OMPPrivateScope
VarScope(*this);
2449 VarScope
.addPrivate(OrigVD
, OrigAddr
);
2450 (void)VarScope
.Privatize();
2457 EmitBlock(DoneBB
, /*IsFinished=*/true);
2460 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction
&CGF
,
2461 const OMPLoopDirective
&S
,
2462 CodeGenFunction::JumpDest LoopExit
) {
2463 CGF
.EmitOMPLoopBody(S
, LoopExit
);
2464 CGF
.EmitStopPoint(&S
);
2467 /// Emit a helper variable and return corresponding lvalue.
2468 static LValue
EmitOMPHelperVar(CodeGenFunction
&CGF
,
2469 const DeclRefExpr
*Helper
) {
2470 auto VDecl
= cast
<VarDecl
>(Helper
->getDecl());
2471 CGF
.EmitVarDecl(*VDecl
);
2472 return CGF
.EmitLValue(Helper
);
2475 static void emitCommonSimdLoop(CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2476 const RegionCodeGenTy
&SimdInitGen
,
2477 const RegionCodeGenTy
&BodyCodeGen
) {
2478 auto &&ThenGen
= [&S
, &SimdInitGen
, &BodyCodeGen
](CodeGenFunction
&CGF
,
2479 PrePostActionTy
&) {
2480 CGOpenMPRuntime::NontemporalDeclsRAII
NontemporalsRegion(CGF
.CGM
, S
);
2481 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
2486 auto &&ElseGen
= [&BodyCodeGen
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2487 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
2488 CGF
.LoopStack
.setVectorizeEnable(/*Enable=*/false);
2492 const Expr
*IfCond
= nullptr;
2493 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
2494 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
2495 if (CGF
.getLangOpts().OpenMP
>= 50 &&
2496 (C
->getNameModifier() == OMPD_unknown
||
2497 C
->getNameModifier() == OMPD_simd
)) {
2498 IfCond
= C
->getCondition();
2504 CGF
.CGM
.getOpenMPRuntime().emitIfClause(CGF
, IfCond
, ThenGen
, ElseGen
);
2506 RegionCodeGenTy
ThenRCG(ThenGen
);
2511 static void emitOMPSimdRegion(CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2512 PrePostActionTy
&Action
) {
2514 assert(isOpenMPSimdDirective(S
.getDirectiveKind()) &&
2515 "Expected simd directive");
2516 OMPLoopScope
PreInitScope(CGF
, S
);
2518 // for (IV in 0..LastIteration) BODY;
2519 // <Final counter/linear vars updates>;
2522 if (isOpenMPDistributeDirective(S
.getDirectiveKind()) ||
2523 isOpenMPWorksharingDirective(S
.getDirectiveKind()) ||
2524 isOpenMPTaskLoopDirective(S
.getDirectiveKind())) {
2525 (void)EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(S
.getLowerBoundVariable()));
2526 (void)EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(S
.getUpperBoundVariable()));
2529 // Emit: if (PreCond) - begin.
2530 // If the condition constant folds and can be elided, avoid emitting the
2533 llvm::BasicBlock
*ContBlock
= nullptr;
2534 if (CGF
.ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
2538 llvm::BasicBlock
*ThenBlock
= CGF
.createBasicBlock("simd.if.then");
2539 ContBlock
= CGF
.createBasicBlock("simd.if.end");
2540 emitPreCond(CGF
, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
2541 CGF
.getProfileCount(&S
));
2542 CGF
.EmitBlock(ThenBlock
);
2543 CGF
.incrementProfileCounter(&S
);
2546 // Emit the loop iteration variable.
2547 const Expr
*IVExpr
= S
.getIterationVariable();
2548 const auto *IVDecl
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IVExpr
)->getDecl());
2549 CGF
.EmitVarDecl(*IVDecl
);
2550 CGF
.EmitIgnoredExpr(S
.getInit());
2552 // Emit the iterations count variable.
2553 // If it is not a variable, Sema decided to calculate iterations count on
2554 // each iteration (e.g., it is foldable into a constant).
2555 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
2556 CGF
.EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
2557 // Emit calculation of the iterations count.
2558 CGF
.EmitIgnoredExpr(S
.getCalcLastIteration());
2561 emitAlignedClause(CGF
, S
);
2562 (void)CGF
.EmitOMPLinearClauseInit(S
);
2564 CodeGenFunction::OMPPrivateScope
LoopScope(CGF
);
2565 CGF
.EmitOMPPrivateLoopCounters(S
, LoopScope
);
2566 CGF
.EmitOMPLinearClause(S
, LoopScope
);
2567 CGF
.EmitOMPPrivateClause(S
, LoopScope
);
2568 CGF
.EmitOMPReductionClauseInit(S
, LoopScope
);
2569 CGOpenMPRuntime::LastprivateConditionalRAII
LPCRegion(
2570 CGF
, S
, CGF
.EmitLValue(S
.getIterationVariable()));
2571 bool HasLastprivateClause
= CGF
.EmitOMPLastprivateClauseInit(S
, LoopScope
);
2572 (void)LoopScope
.Privatize();
2573 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
2574 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
2578 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2579 CGF
.EmitOMPSimdInit(S
);
2581 [&S
, &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2582 CGF
.EmitOMPInnerLoop(
2583 S
, LoopScope
.requiresCleanups(), S
.getCond(), S
.getInc(),
2584 [&S
](CodeGenFunction
&CGF
) {
2585 emitOMPLoopBodyWithStopPoint(CGF
, S
,
2586 CodeGenFunction::JumpDest());
2588 [](CodeGenFunction
&) {});
2590 CGF
.EmitOMPSimdFinal(S
, [](CodeGenFunction
&) { return nullptr; });
2591 // Emit final copy of the lastprivate variables at the end of loops.
2592 if (HasLastprivateClause
)
2593 CGF
.EmitOMPLastprivateClauseFinal(S
, /*NoFinals=*/true);
2594 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_simd
);
2595 emitPostUpdateForReductionClause(CGF
, S
,
2596 [](CodeGenFunction
&) { return nullptr; });
2597 LoopScope
.restoreMap();
2598 CGF
.EmitOMPLinearClauseFinal(S
, [](CodeGenFunction
&) { return nullptr; });
2600 // Emit: if (PreCond) - end.
2602 CGF
.EmitBranch(ContBlock
);
2603 CGF
.EmitBlock(ContBlock
, true);
2607 static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective
&S
) {
2608 // Check for unsupported clauses
2609 for (OMPClause
*C
: S
.clauses()) {
2610 // Currently only order, simdlen and safelen clauses are supported
2611 if (!(isa
<OMPSimdlenClause
>(C
) || isa
<OMPSafelenClause
>(C
) ||
2612 isa
<OMPOrderClause
>(C
) || isa
<OMPAlignedClause
>(C
)))
2616 // Check if we have a statement with the ordered directive.
2617 // Visit the statement hierarchy to find a compound statement
2618 // with a ordered directive in it.
2619 if (const auto *CanonLoop
= dyn_cast
<OMPCanonicalLoop
>(S
.getRawStmt())) {
2620 if (const Stmt
*SyntacticalLoop
= CanonLoop
->getLoopStmt()) {
2621 for (const Stmt
*SubStmt
: SyntacticalLoop
->children()) {
2624 if (const CompoundStmt
*CS
= dyn_cast
<CompoundStmt
>(SubStmt
)) {
2625 for (const Stmt
*CSSubStmt
: CS
->children()) {
2628 if (isa
<OMPOrderedDirective
>(CSSubStmt
)) {
2638 static llvm::MapVector
<llvm::Value
*, llvm::Value
*>
2639 GetAlignedMapping(const OMPSimdDirective
&S
, CodeGenFunction
&CGF
) {
2640 llvm::MapVector
<llvm::Value
*, llvm::Value
*> AlignedVars
;
2641 for (const auto *Clause
: S
.getClausesOfKind
<OMPAlignedClause
>()) {
2642 llvm::APInt
ClauseAlignment(64, 0);
2643 if (const Expr
*AlignmentExpr
= Clause
->getAlignment()) {
2645 cast
<llvm::ConstantInt
>(CGF
.EmitScalarExpr(AlignmentExpr
));
2646 ClauseAlignment
= AlignmentCI
->getValue();
2648 for (const Expr
*E
: Clause
->varlists()) {
2649 llvm::APInt
Alignment(ClauseAlignment
);
2650 if (Alignment
== 0) {
2651 // OpenMP [2.8.1, Description]
2652 // If no optional parameter is specified, implementation-defined default
2653 // alignments for SIMD instructions on the target platforms are assumed.
2656 .toCharUnitsFromBits(CGF
.getContext().getOpenMPDefaultSimdAlign(
2657 E
->getType()->getPointeeType()))
2660 assert((Alignment
== 0 || Alignment
.isPowerOf2()) &&
2661 "alignment is not power of 2");
2662 llvm::Value
*PtrValue
= CGF
.EmitScalarExpr(E
);
2663 AlignedVars
[PtrValue
] = CGF
.Builder
.getInt64(Alignment
.getSExtValue());
2669 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective
&S
) {
2670 bool UseOMPIRBuilder
=
2671 CGM
.getLangOpts().OpenMPIRBuilder
&& isSupportedByOpenMPIRBuilder(S
);
2672 if (UseOMPIRBuilder
) {
2673 auto &&CodeGenIRBuilder
= [this, &S
, UseOMPIRBuilder
](CodeGenFunction
&CGF
,
2674 PrePostActionTy
&) {
2675 // Use the OpenMPIRBuilder if enabled.
2676 if (UseOMPIRBuilder
) {
2677 llvm::MapVector
<llvm::Value
*, llvm::Value
*> AlignedVars
=
2678 GetAlignedMapping(S
, CGF
);
2679 // Emit the associated statement and get its loop representation.
2680 const Stmt
*Inner
= S
.getRawStmt();
2681 llvm::CanonicalLoopInfo
*CLI
=
2682 EmitOMPCollapsedCanonicalLoopNest(Inner
, 1);
2684 llvm::OpenMPIRBuilder
&OMPBuilder
=
2685 CGM
.getOpenMPRuntime().getOMPBuilder();
2686 // Add SIMD specific metadata
2687 llvm::ConstantInt
*Simdlen
= nullptr;
2688 if (const auto *C
= S
.getSingleClause
<OMPSimdlenClause
>()) {
2690 this->EmitAnyExpr(C
->getSimdlen(), AggValueSlot::ignored(),
2691 /*ignoreResult=*/true);
2692 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2695 llvm::ConstantInt
*Safelen
= nullptr;
2696 if (const auto *C
= S
.getSingleClause
<OMPSafelenClause
>()) {
2698 this->EmitAnyExpr(C
->getSafelen(), AggValueSlot::ignored(),
2699 /*ignoreResult=*/true);
2700 auto *Val
= cast
<llvm::ConstantInt
>(Len
.getScalarVal());
2703 llvm::omp::OrderKind Order
= llvm::omp::OrderKind::OMP_ORDER_unknown
;
2704 if (const auto *C
= S
.getSingleClause
<OMPOrderClause
>()) {
2705 if (C
->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent
) {
2706 Order
= llvm::omp::OrderKind::OMP_ORDER_concurrent
;
2709 // Add simd metadata to the collapsed loop. Do not generate
2710 // another loop for if clause. Support for if clause is done earlier.
2711 OMPBuilder
.applySimd(CLI
, AlignedVars
,
2712 /*IfCond*/ nullptr, Order
, Simdlen
, Safelen
);
2718 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
2719 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
2720 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
,
2726 ParentLoopDirectiveForScanRegion
ScanRegion(*this, S
);
2727 OMPFirstScanLoop
= true;
2728 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
2729 emitOMPSimdRegion(CGF
, S
, Action
);
2733 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
2734 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
2735 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
, CodeGen
);
2737 // Check for outer lastprivate conditional update.
2738 checkForLastprivateConditionalUpdate(*this, S
);
2741 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective
&S
) {
2742 // Emit the de-sugared statement.
2743 OMPTransformDirectiveScopeRAII
TileScope(*this, &S
);
2744 EmitStmt(S
.getTransformedStmt());
2747 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective
&S
) {
2748 bool UseOMPIRBuilder
= CGM
.getLangOpts().OpenMPIRBuilder
;
2750 if (UseOMPIRBuilder
) {
2751 auto DL
= SourceLocToDebugLoc(S
.getBeginLoc());
2752 const Stmt
*Inner
= S
.getRawStmt();
2754 // Consume nested loop. Clear the entire remaining loop stack because a
2755 // fully unrolled loop is non-transformable. For partial unrolling the
2756 // generated outer loop is pushed back to the stack.
2757 llvm::CanonicalLoopInfo
*CLI
= EmitOMPCollapsedCanonicalLoopNest(Inner
, 1);
2758 OMPLoopNestStack
.clear();
2760 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
2762 bool NeedsUnrolledCLI
= ExpectedOMPLoopDepth
>= 1;
2763 llvm::CanonicalLoopInfo
*UnrolledCLI
= nullptr;
2765 if (S
.hasClausesOfKind
<OMPFullClause
>()) {
2766 assert(ExpectedOMPLoopDepth
== 0);
2767 OMPBuilder
.unrollLoopFull(DL
, CLI
);
2768 } else if (auto *PartialClause
= S
.getSingleClause
<OMPPartialClause
>()) {
2769 uint64_t Factor
= 0;
2770 if (Expr
*FactorExpr
= PartialClause
->getFactor()) {
2771 Factor
= FactorExpr
->EvaluateKnownConstInt(getContext()).getZExtValue();
2772 assert(Factor
>= 1 && "Only positive factors are valid");
2774 OMPBuilder
.unrollLoopPartial(DL
, CLI
, Factor
,
2775 NeedsUnrolledCLI
? &UnrolledCLI
: nullptr);
2777 OMPBuilder
.unrollLoopHeuristic(DL
, CLI
);
2780 assert((!NeedsUnrolledCLI
|| UnrolledCLI
) &&
2781 "NeedsUnrolledCLI implies UnrolledCLI to be set");
2783 OMPLoopNestStack
.push_back(UnrolledCLI
);
2788 // This function is only called if the unrolled loop is not consumed by any
2789 // other loop-associated construct. Such a loop-associated construct will have
2790 // used the transformed AST.
2792 // Set the unroll metadata for the next emitted loop.
2793 LoopStack
.setUnrollState(LoopAttributes::Enable
);
2795 if (S
.hasClausesOfKind
<OMPFullClause
>()) {
2796 LoopStack
.setUnrollState(LoopAttributes::Full
);
2797 } else if (auto *PartialClause
= S
.getSingleClause
<OMPPartialClause
>()) {
2798 if (Expr
*FactorExpr
= PartialClause
->getFactor()) {
2800 FactorExpr
->EvaluateKnownConstInt(getContext()).getZExtValue();
2801 assert(Factor
>= 1 && "Only positive factors are valid");
2802 LoopStack
.setUnrollCount(Factor
);
2806 EmitStmt(S
.getAssociatedStmt());
2809 void CodeGenFunction::EmitOMPOuterLoop(
2810 bool DynamicOrOrdered
, bool IsMonotonic
, const OMPLoopDirective
&S
,
2811 CodeGenFunction::OMPPrivateScope
&LoopScope
,
2812 const CodeGenFunction::OMPLoopArguments
&LoopArgs
,
2813 const CodeGenFunction::CodeGenLoopTy
&CodeGenLoop
,
2814 const CodeGenFunction::CodeGenOrderedTy
&CodeGenOrdered
) {
2815 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
2817 const Expr
*IVExpr
= S
.getIterationVariable();
2818 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
2819 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
2821 JumpDest LoopExit
= getJumpDestInCurrentScope("omp.dispatch.end");
2823 // Start the loop with a block that tests the condition.
2824 llvm::BasicBlock
*CondBlock
= createBasicBlock("omp.dispatch.cond");
2825 EmitBlock(CondBlock
);
2826 const SourceRange R
= S
.getSourceRange();
2827 OMPLoopNestStack
.clear();
2828 LoopStack
.push(CondBlock
, SourceLocToDebugLoc(R
.getBegin()),
2829 SourceLocToDebugLoc(R
.getEnd()));
2831 llvm::Value
*BoolCondVal
= nullptr;
2832 if (!DynamicOrOrdered
) {
2833 // UB = min(UB, GlobalUB) or
2834 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2835 // 'distribute parallel for')
2836 EmitIgnoredExpr(LoopArgs
.EUB
);
2838 EmitIgnoredExpr(LoopArgs
.Init
);
2840 BoolCondVal
= EvaluateExprAsBool(LoopArgs
.Cond
);
2843 RT
.emitForNext(*this, S
.getBeginLoc(), IVSize
, IVSigned
, LoopArgs
.IL
,
2844 LoopArgs
.LB
, LoopArgs
.UB
, LoopArgs
.ST
);
2847 // If there are any cleanups between here and the loop-exit scope,
2848 // create a block to stage a loop exit along.
2849 llvm::BasicBlock
*ExitBlock
= LoopExit
.getBlock();
2850 if (LoopScope
.requiresCleanups())
2851 ExitBlock
= createBasicBlock("omp.dispatch.cleanup");
2853 llvm::BasicBlock
*LoopBody
= createBasicBlock("omp.dispatch.body");
2854 Builder
.CreateCondBr(BoolCondVal
, LoopBody
, ExitBlock
);
2855 if (ExitBlock
!= LoopExit
.getBlock()) {
2856 EmitBlock(ExitBlock
);
2857 EmitBranchThroughCleanup(LoopExit
);
2859 EmitBlock(LoopBody
);
2861 // Emit "IV = LB" (in case of static schedule, we have already calculated new
2862 // LB for loop condition and emitted it above).
2863 if (DynamicOrOrdered
)
2864 EmitIgnoredExpr(LoopArgs
.Init
);
2866 // Create a block for the increment.
2867 JumpDest Continue
= getJumpDestInCurrentScope("omp.dispatch.inc");
2868 BreakContinueStack
.push_back(BreakContinue(LoopExit
, Continue
));
2872 [&S
, IsMonotonic
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2873 // Generate !llvm.loop.parallel metadata for loads and stores for loops
2874 // with dynamic/guided scheduling and without ordered clause.
2875 if (!isOpenMPSimdDirective(S
.getDirectiveKind())) {
2876 CGF
.LoopStack
.setParallel(!IsMonotonic
);
2877 if (const auto *C
= S
.getSingleClause
<OMPOrderClause
>())
2878 if (C
->getKind() == OMPC_ORDER_concurrent
)
2879 CGF
.LoopStack
.setParallel(/*Enable=*/true);
2881 CGF
.EmitOMPSimdInit(S
);
2884 [&S
, &LoopArgs
, LoopExit
, &CodeGenLoop
, IVSize
, IVSigned
, &CodeGenOrdered
,
2885 &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2886 SourceLocation Loc
= S
.getBeginLoc();
2887 // when 'distribute' is not combined with a 'for':
2888 // while (idx <= UB) { BODY; ++idx; }
2889 // when 'distribute' is combined with a 'for'
2890 // (e.g. 'distribute parallel for')
2891 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2892 CGF
.EmitOMPInnerLoop(
2893 S
, LoopScope
.requiresCleanups(), LoopArgs
.Cond
, LoopArgs
.IncExpr
,
2894 [&S
, LoopExit
, &CodeGenLoop
](CodeGenFunction
&CGF
) {
2895 CodeGenLoop(CGF
, S
, LoopExit
);
2897 [IVSize
, IVSigned
, Loc
, &CodeGenOrdered
](CodeGenFunction
&CGF
) {
2898 CodeGenOrdered(CGF
, Loc
, IVSize
, IVSigned
);
2902 EmitBlock(Continue
.getBlock());
2903 BreakContinueStack
.pop_back();
2904 if (!DynamicOrOrdered
) {
2905 // Emit "LB = LB + Stride", "UB = UB + Stride".
2906 EmitIgnoredExpr(LoopArgs
.NextLB
);
2907 EmitIgnoredExpr(LoopArgs
.NextUB
);
2910 EmitBranch(CondBlock
);
2911 OMPLoopNestStack
.clear();
2913 // Emit the fall-through block.
2914 EmitBlock(LoopExit
.getBlock());
2916 // Tell the runtime we are done.
2917 auto &&CodeGen
= [DynamicOrOrdered
, &S
](CodeGenFunction
&CGF
) {
2918 if (!DynamicOrOrdered
)
2919 CGF
.CGM
.getOpenMPRuntime().emitForStaticFinish(CGF
, S
.getEndLoc(),
2920 S
.getDirectiveKind());
2922 OMPCancelStack
.emitExit(*this, S
.getDirectiveKind(), CodeGen
);
2925 void CodeGenFunction::EmitOMPForOuterLoop(
2926 const OpenMPScheduleTy
&ScheduleKind
, bool IsMonotonic
,
2927 const OMPLoopDirective
&S
, OMPPrivateScope
&LoopScope
, bool Ordered
,
2928 const OMPLoopArguments
&LoopArgs
,
2929 const CodeGenDispatchBoundsTy
&CGDispatchBounds
) {
2930 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
2932 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2933 const bool DynamicOrOrdered
= Ordered
|| RT
.isDynamic(ScheduleKind
.Schedule
);
2935 assert((Ordered
|| !RT
.isStaticNonchunked(ScheduleKind
.Schedule
,
2936 LoopArgs
.Chunk
!= nullptr)) &&
2937 "static non-chunked schedule does not need outer loop");
2941 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2942 // When schedule(dynamic,chunk_size) is specified, the iterations are
2943 // distributed to threads in the team in chunks as the threads request them.
2944 // Each thread executes a chunk of iterations, then requests another chunk,
2945 // until no chunks remain to be distributed. Each chunk contains chunk_size
2946 // iterations, except for the last chunk to be distributed, which may have
2947 // fewer iterations. When no chunk_size is specified, it defaults to 1.
2949 // When schedule(guided,chunk_size) is specified, the iterations are assigned
2950 // to threads in the team in chunks as the executing threads request them.
2951 // Each thread executes a chunk of iterations, then requests another chunk,
2952 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2953 // each chunk is proportional to the number of unassigned iterations divided
2954 // by the number of threads in the team, decreasing to 1. For a chunk_size
2955 // with value k (greater than 1), the size of each chunk is determined in the
2956 // same way, with the restriction that the chunks do not contain fewer than k
2957 // iterations (except for the last chunk to be assigned, which may have fewer
2958 // than k iterations).
2960 // When schedule(auto) is specified, the decision regarding scheduling is
2961 // delegated to the compiler and/or runtime system. The programmer gives the
2962 // implementation the freedom to choose any possible mapping of iterations to
2963 // threads in the team.
2965 // When schedule(runtime) is specified, the decision regarding scheduling is
2966 // deferred until run time, and the schedule and chunk size are taken from the
2967 // run-sched-var ICV. If the ICV is set to auto, the schedule is
2968 // implementation defined
2970 // while(__kmpc_dispatch_next(&LB, &UB)) {
2972 // while (idx <= UB) { BODY; ++idx;
2973 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2977 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2978 // When schedule(static, chunk_size) is specified, iterations are divided into
2979 // chunks of size chunk_size, and the chunks are assigned to the threads in
2980 // the team in a round-robin fashion in the order of the thread number.
2982 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2983 // while (idx <= UB) { BODY; ++idx; } // inner loop
2989 const Expr
*IVExpr
= S
.getIterationVariable();
2990 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
2991 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
2993 if (DynamicOrOrdered
) {
2994 const std::pair
<llvm::Value
*, llvm::Value
*> DispatchBounds
=
2995 CGDispatchBounds(*this, S
, LoopArgs
.LB
, LoopArgs
.UB
);
2996 llvm::Value
*LBVal
= DispatchBounds
.first
;
2997 llvm::Value
*UBVal
= DispatchBounds
.second
;
2998 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues
= {LBVal
, UBVal
,
3000 RT
.emitForDispatchInit(*this, S
.getBeginLoc(), ScheduleKind
, IVSize
,
3001 IVSigned
, Ordered
, DipatchRTInputValues
);
3003 CGOpenMPRuntime::StaticRTInput
StaticInit(
3004 IVSize
, IVSigned
, Ordered
, LoopArgs
.IL
, LoopArgs
.LB
, LoopArgs
.UB
,
3005 LoopArgs
.ST
, LoopArgs
.Chunk
);
3006 RT
.emitForStaticInit(*this, S
.getBeginLoc(), S
.getDirectiveKind(),
3007 ScheduleKind
, StaticInit
);
3010 auto &&CodeGenOrdered
= [Ordered
](CodeGenFunction
&CGF
, SourceLocation Loc
,
3011 const unsigned IVSize
,
3012 const bool IVSigned
) {
3014 CGF
.CGM
.getOpenMPRuntime().emitForOrderedIterationEnd(CGF
, Loc
, IVSize
,
3019 OMPLoopArguments
OuterLoopArgs(LoopArgs
.LB
, LoopArgs
.UB
, LoopArgs
.ST
,
3020 LoopArgs
.IL
, LoopArgs
.Chunk
, LoopArgs
.EUB
);
3021 OuterLoopArgs
.IncExpr
= S
.getInc();
3022 OuterLoopArgs
.Init
= S
.getInit();
3023 OuterLoopArgs
.Cond
= S
.getCond();
3024 OuterLoopArgs
.NextLB
= S
.getNextLowerBound();
3025 OuterLoopArgs
.NextUB
= S
.getNextUpperBound();
3026 EmitOMPOuterLoop(DynamicOrOrdered
, IsMonotonic
, S
, LoopScope
, OuterLoopArgs
,
3027 emitOMPLoopBodyWithStopPoint
, CodeGenOrdered
);
3030 static void emitEmptyOrdered(CodeGenFunction
&, SourceLocation Loc
,
3031 const unsigned IVSize
, const bool IVSigned
) {}
3033 void CodeGenFunction::EmitOMPDistributeOuterLoop(
3034 OpenMPDistScheduleClauseKind ScheduleKind
, const OMPLoopDirective
&S
,
3035 OMPPrivateScope
&LoopScope
, const OMPLoopArguments
&LoopArgs
,
3036 const CodeGenLoopTy
&CodeGenLoopContent
) {
3038 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
3041 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3045 const Expr
*IVExpr
= S
.getIterationVariable();
3046 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
3047 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
3049 CGOpenMPRuntime::StaticRTInput
StaticInit(
3050 IVSize
, IVSigned
, /* Ordered = */ false, LoopArgs
.IL
, LoopArgs
.LB
,
3051 LoopArgs
.UB
, LoopArgs
.ST
, LoopArgs
.Chunk
);
3052 RT
.emitDistributeStaticInit(*this, S
.getBeginLoc(), ScheduleKind
, StaticInit
);
3054 // for combined 'distribute' and 'for' the increment expression of distribute
3055 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3057 if (isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind()))
3058 IncExpr
= S
.getDistInc();
3060 IncExpr
= S
.getInc();
3062 // this routine is shared by 'omp distribute parallel for' and
3063 // 'omp distribute': select the right EUB expression depending on the
3065 OMPLoopArguments OuterLoopArgs
;
3066 OuterLoopArgs
.LB
= LoopArgs
.LB
;
3067 OuterLoopArgs
.UB
= LoopArgs
.UB
;
3068 OuterLoopArgs
.ST
= LoopArgs
.ST
;
3069 OuterLoopArgs
.IL
= LoopArgs
.IL
;
3070 OuterLoopArgs
.Chunk
= LoopArgs
.Chunk
;
3071 OuterLoopArgs
.EUB
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3072 ? S
.getCombinedEnsureUpperBound()
3073 : S
.getEnsureUpperBound();
3074 OuterLoopArgs
.IncExpr
= IncExpr
;
3075 OuterLoopArgs
.Init
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3076 ? S
.getCombinedInit()
3078 OuterLoopArgs
.Cond
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3079 ? S
.getCombinedCond()
3081 OuterLoopArgs
.NextLB
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3082 ? S
.getCombinedNextLowerBound()
3083 : S
.getNextLowerBound();
3084 OuterLoopArgs
.NextUB
= isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
3085 ? S
.getCombinedNextUpperBound()
3086 : S
.getNextUpperBound();
3088 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S
,
3089 LoopScope
, OuterLoopArgs
, CodeGenLoopContent
,
3093 static std::pair
<LValue
, LValue
>
3094 emitDistributeParallelForInnerBounds(CodeGenFunction
&CGF
,
3095 const OMPExecutableDirective
&S
) {
3096 const OMPLoopDirective
&LS
= cast
<OMPLoopDirective
>(S
);
3098 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getLowerBoundVariable()));
3100 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getUpperBoundVariable()));
3102 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3103 // parallel for') we need to use the 'distribute'
3104 // chunk lower and upper bounds rather than the whole loop iteration
3105 // space. These are parameters to the outlined function for 'parallel'
3106 // and we copy the bounds of the previous schedule into the
3107 // the current ones.
3108 LValue PrevLB
= CGF
.EmitLValue(LS
.getPrevLowerBoundVariable());
3109 LValue PrevUB
= CGF
.EmitLValue(LS
.getPrevUpperBoundVariable());
3110 llvm::Value
*PrevLBVal
= CGF
.EmitLoadOfScalar(
3111 PrevLB
, LS
.getPrevLowerBoundVariable()->getExprLoc());
3112 PrevLBVal
= CGF
.EmitScalarConversion(
3113 PrevLBVal
, LS
.getPrevLowerBoundVariable()->getType(),
3114 LS
.getIterationVariable()->getType(),
3115 LS
.getPrevLowerBoundVariable()->getExprLoc());
3116 llvm::Value
*PrevUBVal
= CGF
.EmitLoadOfScalar(
3117 PrevUB
, LS
.getPrevUpperBoundVariable()->getExprLoc());
3118 PrevUBVal
= CGF
.EmitScalarConversion(
3119 PrevUBVal
, LS
.getPrevUpperBoundVariable()->getType(),
3120 LS
.getIterationVariable()->getType(),
3121 LS
.getPrevUpperBoundVariable()->getExprLoc());
3123 CGF
.EmitStoreOfScalar(PrevLBVal
, LB
);
3124 CGF
.EmitStoreOfScalar(PrevUBVal
, UB
);
3129 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3130 /// we need to use the LB and UB expressions generated by the worksharing
3131 /// code generation support, whereas in non combined situations we would
3132 /// just emit 0 and the LastIteration expression
3133 /// This function is necessary due to the difference of the LB and UB
3134 /// types for the RT emission routines for 'for_static_init' and
3135 /// 'for_dispatch_init'
3136 static std::pair
<llvm::Value
*, llvm::Value
*>
3137 emitDistributeParallelForDispatchBounds(CodeGenFunction
&CGF
,
3138 const OMPExecutableDirective
&S
,
3139 Address LB
, Address UB
) {
3140 const OMPLoopDirective
&LS
= cast
<OMPLoopDirective
>(S
);
3141 const Expr
*IVExpr
= LS
.getIterationVariable();
3142 // when implementing a dynamic schedule for a 'for' combined with a
3143 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3144 // is not normalized as each team only executes its own assigned
3146 QualType IteratorTy
= IVExpr
->getType();
3147 llvm::Value
*LBVal
=
3148 CGF
.EmitLoadOfScalar(LB
, /*Volatile=*/false, IteratorTy
, S
.getBeginLoc());
3149 llvm::Value
*UBVal
=
3150 CGF
.EmitLoadOfScalar(UB
, /*Volatile=*/false, IteratorTy
, S
.getBeginLoc());
3151 return {LBVal
, UBVal
};
3154 static void emitDistributeParallelForDistributeInnerBoundParams(
3155 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
3156 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
) {
3157 const auto &Dir
= cast
<OMPLoopDirective
>(S
);
3159 CGF
.EmitLValue(cast
<DeclRefExpr
>(Dir
.getCombinedLowerBoundVariable()));
3160 llvm::Value
*LBCast
=
3161 CGF
.Builder
.CreateIntCast(CGF
.Builder
.CreateLoad(LB
.getAddress(CGF
)),
3162 CGF
.SizeTy
, /*isSigned=*/false);
3163 CapturedVars
.push_back(LBCast
);
3165 CGF
.EmitLValue(cast
<DeclRefExpr
>(Dir
.getCombinedUpperBoundVariable()));
3167 llvm::Value
*UBCast
=
3168 CGF
.Builder
.CreateIntCast(CGF
.Builder
.CreateLoad(UB
.getAddress(CGF
)),
3169 CGF
.SizeTy
, /*isSigned=*/false);
3170 CapturedVars
.push_back(UBCast
);
3174 emitInnerParallelForWhenCombined(CodeGenFunction
&CGF
,
3175 const OMPLoopDirective
&S
,
3176 CodeGenFunction::JumpDest LoopExit
) {
3177 auto &&CGInlinedWorksharingLoop
= [&S
](CodeGenFunction
&CGF
,
3178 PrePostActionTy
&Action
) {
3180 bool HasCancel
= false;
3181 if (!isOpenMPSimdDirective(S
.getDirectiveKind())) {
3182 if (const auto *D
= dyn_cast
<OMPTeamsDistributeParallelForDirective
>(&S
))
3183 HasCancel
= D
->hasCancel();
3184 else if (const auto *D
= dyn_cast
<OMPDistributeParallelForDirective
>(&S
))
3185 HasCancel
= D
->hasCancel();
3186 else if (const auto *D
=
3187 dyn_cast
<OMPTargetTeamsDistributeParallelForDirective
>(&S
))
3188 HasCancel
= D
->hasCancel();
3190 CodeGenFunction::OMPCancelStackRAII
CancelRegion(CGF
, S
.getDirectiveKind(),
3192 CGF
.EmitOMPWorksharingLoop(S
, S
.getPrevEnsureUpperBound(),
3193 emitDistributeParallelForInnerBounds
,
3194 emitDistributeParallelForDispatchBounds
);
3197 emitCommonOMPParallelDirective(
3199 isOpenMPSimdDirective(S
.getDirectiveKind()) ? OMPD_for_simd
: OMPD_for
,
3200 CGInlinedWorksharingLoop
,
3201 emitDistributeParallelForDistributeInnerBoundParams
);
3204 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3205 const OMPDistributeParallelForDirective
&S
) {
3206 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3207 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
3210 OMPLexicalScope
Scope(*this, S
, OMPD_parallel
);
3211 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute
, CodeGen
);
3214 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3215 const OMPDistributeParallelForSimdDirective
&S
) {
3216 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3217 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
3220 OMPLexicalScope
Scope(*this, S
, OMPD_parallel
);
3221 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute
, CodeGen
);
3224 void CodeGenFunction::EmitOMPDistributeSimdDirective(
3225 const OMPDistributeSimdDirective
&S
) {
3226 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3227 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
3229 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
3230 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
, CodeGen
);
3233 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3234 CodeGenModule
&CGM
, StringRef ParentName
, const OMPTargetSimdDirective
&S
) {
3235 // Emit SPMD target parallel for region as a standalone region.
3236 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
3237 emitOMPSimdRegion(CGF
, S
, Action
);
3240 llvm::Constant
*Addr
;
3241 // Emit target region as a standalone region.
3242 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
3243 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
3244 assert(Fn
&& Addr
&& "Target device function emission failed.");
3247 void CodeGenFunction::EmitOMPTargetSimdDirective(
3248 const OMPTargetSimdDirective
&S
) {
3249 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
3250 emitOMPSimdRegion(CGF
, S
, Action
);
3252 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
3256 struct ScheduleKindModifiersTy
{
3257 OpenMPScheduleClauseKind Kind
;
3258 OpenMPScheduleClauseModifier M1
;
3259 OpenMPScheduleClauseModifier M2
;
3260 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind
,
3261 OpenMPScheduleClauseModifier M1
,
3262 OpenMPScheduleClauseModifier M2
)
3263 : Kind(Kind
), M1(M1
), M2(M2
) {}
3267 bool CodeGenFunction::EmitOMPWorksharingLoop(
3268 const OMPLoopDirective
&S
, Expr
*EUB
,
3269 const CodeGenLoopBoundsTy
&CodeGenLoopBounds
,
3270 const CodeGenDispatchBoundsTy
&CGDispatchBounds
) {
3271 // Emit the loop iteration variable.
3272 const auto *IVExpr
= cast
<DeclRefExpr
>(S
.getIterationVariable());
3273 const auto *IVDecl
= cast
<VarDecl
>(IVExpr
->getDecl());
3274 EmitVarDecl(*IVDecl
);
3276 // Emit the iterations count variable.
3277 // If it is not a variable, Sema decided to calculate iterations count on each
3278 // iteration (e.g., it is foldable into a constant).
3279 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
3280 EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
3281 // Emit calculation of the iterations count.
3282 EmitIgnoredExpr(S
.getCalcLastIteration());
3285 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
3287 bool HasLastprivateClause
;
3288 // Check pre-condition.
3290 OMPLoopScope
PreInitScope(*this, S
);
3291 // Skip the entire loop if we don't meet the precondition.
3292 // If the condition constant folds and can be elided, avoid emitting the
3295 llvm::BasicBlock
*ContBlock
= nullptr;
3296 if (ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
3300 llvm::BasicBlock
*ThenBlock
= createBasicBlock("omp.precond.then");
3301 ContBlock
= createBasicBlock("omp.precond.end");
3302 emitPreCond(*this, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
3303 getProfileCount(&S
));
3304 EmitBlock(ThenBlock
);
3305 incrementProfileCounter(&S
);
3308 RunCleanupsScope
DoacrossCleanupScope(*this);
3309 bool Ordered
= false;
3310 if (const auto *OrderedClause
= S
.getSingleClause
<OMPOrderedClause
>()) {
3311 if (OrderedClause
->getNumForLoops())
3312 RT
.emitDoacrossInit(*this, S
, OrderedClause
->getLoopNumIterations());
3317 llvm::DenseSet
<const Expr
*> EmittedFinals
;
3318 emitAlignedClause(*this, S
);
3319 bool HasLinears
= EmitOMPLinearClauseInit(S
);
3320 // Emit helper vars inits.
3322 std::pair
<LValue
, LValue
> Bounds
= CodeGenLoopBounds(*this, S
);
3323 LValue LB
= Bounds
.first
;
3324 LValue UB
= Bounds
.second
;
3326 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getStrideVariable()));
3328 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getIsLastIterVariable()));
3330 // Emit 'then' code.
3332 OMPPrivateScope
LoopScope(*this);
3333 if (EmitOMPFirstprivateClause(S
, LoopScope
) || HasLinears
) {
3334 // Emit implicit barrier to synchronize threads and avoid data races on
3335 // initialization of firstprivate variables and post-update of
3336 // lastprivate variables.
3337 CGM
.getOpenMPRuntime().emitBarrierCall(
3338 *this, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
3339 /*ForceSimpleCall=*/true);
3341 EmitOMPPrivateClause(S
, LoopScope
);
3342 CGOpenMPRuntime::LastprivateConditionalRAII
LPCRegion(
3343 *this, S
, EmitLValue(S
.getIterationVariable()));
3344 HasLastprivateClause
= EmitOMPLastprivateClauseInit(S
, LoopScope
);
3345 EmitOMPReductionClauseInit(S
, LoopScope
);
3346 EmitOMPPrivateLoopCounters(S
, LoopScope
);
3347 EmitOMPLinearClause(S
, LoopScope
);
3348 (void)LoopScope
.Privatize();
3349 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
3350 CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S
);
3352 // Detect the loop schedule kind and chunk.
3353 const Expr
*ChunkExpr
= nullptr;
3354 OpenMPScheduleTy ScheduleKind
;
3355 if (const auto *C
= S
.getSingleClause
<OMPScheduleClause
>()) {
3356 ScheduleKind
.Schedule
= C
->getScheduleKind();
3357 ScheduleKind
.M1
= C
->getFirstScheduleModifier();
3358 ScheduleKind
.M2
= C
->getSecondScheduleModifier();
3359 ChunkExpr
= C
->getChunkSize();
3361 // Default behaviour for schedule clause.
3362 CGM
.getOpenMPRuntime().getDefaultScheduleAndChunk(
3363 *this, S
, ScheduleKind
.Schedule
, ChunkExpr
);
3365 bool HasChunkSizeOne
= false;
3366 llvm::Value
*Chunk
= nullptr;
3368 Chunk
= EmitScalarExpr(ChunkExpr
);
3369 Chunk
= EmitScalarConversion(Chunk
, ChunkExpr
->getType(),
3370 S
.getIterationVariable()->getType(),
3372 Expr::EvalResult Result
;
3373 if (ChunkExpr
->EvaluateAsInt(Result
, getContext())) {
3374 llvm::APSInt EvaluatedChunk
= Result
.Val
.getInt();
3375 HasChunkSizeOne
= (EvaluatedChunk
.getLimitedValue() == 1);
3378 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
3379 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
3380 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3381 // If the static schedule kind is specified or if the ordered clause is
3382 // specified, and if no monotonic modifier is specified, the effect will
3383 // be as if the monotonic modifier was specified.
3384 bool StaticChunkedOne
=
3385 RT
.isStaticChunked(ScheduleKind
.Schedule
,
3386 /* Chunked */ Chunk
!= nullptr) &&
3388 isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind());
3391 (ScheduleKind
.Schedule
== OMPC_SCHEDULE_static
&&
3392 !(ScheduleKind
.M1
== OMPC_SCHEDULE_MODIFIER_nonmonotonic
||
3393 ScheduleKind
.M2
== OMPC_SCHEDULE_MODIFIER_nonmonotonic
)) ||
3394 ScheduleKind
.M1
== OMPC_SCHEDULE_MODIFIER_monotonic
||
3395 ScheduleKind
.M2
== OMPC_SCHEDULE_MODIFIER_monotonic
;
3396 if ((RT
.isStaticNonchunked(ScheduleKind
.Schedule
,
3397 /* Chunked */ Chunk
!= nullptr) ||
3398 StaticChunkedOne
) &&
3401 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3404 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3405 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
3406 CGF
.EmitOMPSimdInit(S
);
3407 } else if (const auto *C
= S
.getSingleClause
<OMPOrderClause
>()) {
3408 if (C
->getKind() == OMPC_ORDER_concurrent
)
3409 CGF
.LoopStack
.setParallel(/*Enable=*/true);
3412 [IVSize
, IVSigned
, Ordered
, IL
, LB
, UB
, ST
, StaticChunkedOne
, Chunk
,
3413 &S
, ScheduleKind
, LoopExit
,
3414 &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3415 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3416 // When no chunk_size is specified, the iteration space is divided
3417 // into chunks that are approximately equal in size, and at most
3418 // one chunk is distributed to each thread. Note that the size of
3419 // the chunks is unspecified in this case.
3420 CGOpenMPRuntime::StaticRTInput
StaticInit(
3421 IVSize
, IVSigned
, Ordered
, IL
.getAddress(CGF
),
3422 LB
.getAddress(CGF
), UB
.getAddress(CGF
), ST
.getAddress(CGF
),
3423 StaticChunkedOne
? Chunk
: nullptr);
3424 CGF
.CGM
.getOpenMPRuntime().emitForStaticInit(
3425 CGF
, S
.getBeginLoc(), S
.getDirectiveKind(), ScheduleKind
,
3427 // UB = min(UB, GlobalUB);
3428 if (!StaticChunkedOne
)
3429 CGF
.EmitIgnoredExpr(S
.getEnsureUpperBound());
3431 CGF
.EmitIgnoredExpr(S
.getInit());
3432 // For unchunked static schedule generate:
3434 // while (idx <= UB) {
3439 // For static schedule with chunk one:
3441 // while (IV <= PrevUB) {
3445 CGF
.EmitOMPInnerLoop(
3446 S
, LoopScope
.requiresCleanups(),
3447 StaticChunkedOne
? S
.getCombinedParForInDistCond()
3449 StaticChunkedOne
? S
.getDistInc() : S
.getInc(),
3450 [&S
, LoopExit
](CodeGenFunction
&CGF
) {
3451 emitOMPLoopBodyWithStopPoint(CGF
, S
, LoopExit
);
3453 [](CodeGenFunction
&) {});
3455 EmitBlock(LoopExit
.getBlock());
3456 // Tell the runtime we are done.
3457 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
) {
3458 CGF
.CGM
.getOpenMPRuntime().emitForStaticFinish(CGF
, S
.getEndLoc(),
3459 S
.getDirectiveKind());
3461 OMPCancelStack
.emitExit(*this, S
.getDirectiveKind(), CodeGen
);
3463 // Emit the outer loop, which requests its work chunk [LB..UB] from
3464 // runtime and runs the inner loop to process it.
3465 const OMPLoopArguments
LoopArguments(
3466 LB
.getAddress(*this), UB
.getAddress(*this), ST
.getAddress(*this),
3467 IL
.getAddress(*this), Chunk
, EUB
);
3468 EmitOMPForOuterLoop(ScheduleKind
, IsMonotonic
, S
, LoopScope
, Ordered
,
3469 LoopArguments
, CGDispatchBounds
);
3471 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
3472 EmitOMPSimdFinal(S
, [IL
, &S
](CodeGenFunction
&CGF
) {
3473 return CGF
.Builder
.CreateIsNotNull(
3474 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
3477 EmitOMPReductionClauseFinal(
3478 S
, /*ReductionKind=*/isOpenMPSimdDirective(S
.getDirectiveKind())
3479 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3480 : /*Parallel only*/ OMPD_parallel
);
3481 // Emit post-update of the reduction variables if IsLastIter != 0.
3482 emitPostUpdateForReductionClause(
3483 *this, S
, [IL
, &S
](CodeGenFunction
&CGF
) {
3484 return CGF
.Builder
.CreateIsNotNull(
3485 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
3487 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3488 if (HasLastprivateClause
)
3489 EmitOMPLastprivateClauseFinal(
3490 S
, isOpenMPSimdDirective(S
.getDirectiveKind()),
3491 Builder
.CreateIsNotNull(EmitLoadOfScalar(IL
, S
.getBeginLoc())));
3492 LoopScope
.restoreMap();
3493 EmitOMPLinearClauseFinal(S
, [IL
, &S
](CodeGenFunction
&CGF
) {
3494 return CGF
.Builder
.CreateIsNotNull(
3495 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
3498 DoacrossCleanupScope
.ForceCleanup();
3499 // We're now done with the loop, so jump to the continuation block.
3501 EmitBranch(ContBlock
);
3502 EmitBlock(ContBlock
, /*IsFinished=*/true);
3505 return HasLastprivateClause
;
3508 /// The following two functions generate expressions for the loop lower
3509 /// and upper bounds in case of static and dynamic (dispatch) schedule
3510 /// of the associated 'for' or 'distribute' loop.
3511 static std::pair
<LValue
, LValue
>
3512 emitForLoopBounds(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
3513 const auto &LS
= cast
<OMPLoopDirective
>(S
);
3515 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getLowerBoundVariable()));
3517 EmitOMPHelperVar(CGF
, cast
<DeclRefExpr
>(LS
.getUpperBoundVariable()));
3521 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3522 /// consider the lower and upper bound expressions generated by the
3523 /// worksharing loop support, but we use 0 and the iteration space size as
3525 static std::pair
<llvm::Value
*, llvm::Value
*>
3526 emitDispatchForLoopBounds(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
,
3527 Address LB
, Address UB
) {
3528 const auto &LS
= cast
<OMPLoopDirective
>(S
);
3529 const Expr
*IVExpr
= LS
.getIterationVariable();
3530 const unsigned IVSize
= CGF
.getContext().getTypeSize(IVExpr
->getType());
3531 llvm::Value
*LBVal
= CGF
.Builder
.getIntN(IVSize
, 0);
3532 llvm::Value
*UBVal
= CGF
.EmitScalarExpr(LS
.getLastIteration());
3533 return {LBVal
, UBVal
};
3536 /// Emits internal temp array declarations for the directive with inscan
3538 /// The code is the following:
3540 /// size num_iters = <num_iters>;
3541 /// <type> buffer[num_iters];
3543 static void emitScanBasedDirectiveDecls(
3544 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
3545 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> NumIteratorsGen
) {
3546 llvm::Value
*OMPScanNumIterations
= CGF
.Builder
.CreateIntCast(
3547 NumIteratorsGen(CGF
), CGF
.SizeTy
, /*isSigned=*/false);
3548 SmallVector
<const Expr
*, 4> Shareds
;
3549 SmallVector
<const Expr
*, 4> Privates
;
3550 SmallVector
<const Expr
*, 4> ReductionOps
;
3551 SmallVector
<const Expr
*, 4> CopyArrayTemps
;
3552 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
3553 assert(C
->getModifier() == OMPC_REDUCTION_inscan
&&
3554 "Only inscan reductions are expected.");
3555 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
3556 Privates
.append(C
->privates().begin(), C
->privates().end());
3557 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
3558 CopyArrayTemps
.append(C
->copy_array_temps().begin(),
3559 C
->copy_array_temps().end());
3562 // Emit buffers for each reduction variables.
3563 // ReductionCodeGen is required to emit correctly the code for array
3565 ReductionCodeGen
RedCG(Shareds
, Shareds
, Privates
, ReductionOps
);
3567 auto *ITA
= CopyArrayTemps
.begin();
3568 for (const Expr
*IRef
: Privates
) {
3569 const auto *PrivateVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IRef
)->getDecl());
3570 // Emit variably modified arrays, used for arrays/array sections
3572 if (PrivateVD
->getType()->isVariablyModifiedType()) {
3573 RedCG
.emitSharedOrigLValue(CGF
, Count
);
3574 RedCG
.emitAggregateType(CGF
, Count
);
3576 CodeGenFunction::OpaqueValueMapping
DimMapping(
3578 cast
<OpaqueValueExpr
>(
3579 cast
<VariableArrayType
>((*ITA
)->getType()->getAsArrayTypeUnsafe())
3581 RValue::get(OMPScanNumIterations
));
3582 // Emit temp buffer.
3583 CGF
.EmitVarDecl(*cast
<VarDecl
>(cast
<DeclRefExpr
>(*ITA
)->getDecl()));
3590 /// Copies final inscan reductions values to the original variables.
3591 /// The code is the following:
3593 /// <orig_var> = buffer[num_iters-1];
3595 static void emitScanBasedDirectiveFinals(
3596 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
3597 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> NumIteratorsGen
) {
3598 llvm::Value
*OMPScanNumIterations
= CGF
.Builder
.CreateIntCast(
3599 NumIteratorsGen(CGF
), CGF
.SizeTy
, /*isSigned=*/false);
3600 SmallVector
<const Expr
*, 4> Shareds
;
3601 SmallVector
<const Expr
*, 4> LHSs
;
3602 SmallVector
<const Expr
*, 4> RHSs
;
3603 SmallVector
<const Expr
*, 4> Privates
;
3604 SmallVector
<const Expr
*, 4> CopyOps
;
3605 SmallVector
<const Expr
*, 4> CopyArrayElems
;
3606 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
3607 assert(C
->getModifier() == OMPC_REDUCTION_inscan
&&
3608 "Only inscan reductions are expected.");
3609 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
3610 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
3611 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
3612 Privates
.append(C
->privates().begin(), C
->privates().end());
3613 CopyOps
.append(C
->copy_ops().begin(), C
->copy_ops().end());
3614 CopyArrayElems
.append(C
->copy_array_elems().begin(),
3615 C
->copy_array_elems().end());
3617 // Create temp var and copy LHS value to this temp value.
3618 // LHS = TMP[LastIter];
3619 llvm::Value
*OMPLast
= CGF
.Builder
.CreateNSWSub(
3620 OMPScanNumIterations
,
3621 llvm::ConstantInt::get(CGF
.SizeTy
, 1, /*isSigned=*/false));
3622 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
3623 const Expr
*PrivateExpr
= Privates
[I
];
3624 const Expr
*OrigExpr
= Shareds
[I
];
3625 const Expr
*CopyArrayElem
= CopyArrayElems
[I
];
3626 CodeGenFunction::OpaqueValueMapping
IdxMapping(
3628 cast
<OpaqueValueExpr
>(
3629 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
3630 RValue::get(OMPLast
));
3631 LValue DestLVal
= CGF
.EmitLValue(OrigExpr
);
3632 LValue SrcLVal
= CGF
.EmitLValue(CopyArrayElem
);
3633 CGF
.EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(CGF
),
3634 SrcLVal
.getAddress(CGF
),
3635 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
3636 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
3641 /// Emits the code for the directive with inscan reductions.
3642 /// The code is the following:
3645 /// for (i: 0..<num_iters>) {
3647 /// buffer[i] = red;
3649 /// #pragma omp master // in parallel region
3650 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3651 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3652 /// buffer[i] op= buffer[i-pow(2,k)];
3653 /// #pragma omp barrier // in parallel region
3655 /// for (0..<num_iters>) {
3656 /// red = InclusiveScan ? buffer[i] : buffer[i-1];
3660 static void emitScanBasedDirective(
3661 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
3662 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&)> NumIteratorsGen
,
3663 llvm::function_ref
<void(CodeGenFunction
&)> FirstGen
,
3664 llvm::function_ref
<void(CodeGenFunction
&)> SecondGen
) {
3665 llvm::Value
*OMPScanNumIterations
= CGF
.Builder
.CreateIntCast(
3666 NumIteratorsGen(CGF
), CGF
.SizeTy
, /*isSigned=*/false);
3667 SmallVector
<const Expr
*, 4> Privates
;
3668 SmallVector
<const Expr
*, 4> ReductionOps
;
3669 SmallVector
<const Expr
*, 4> LHSs
;
3670 SmallVector
<const Expr
*, 4> RHSs
;
3671 SmallVector
<const Expr
*, 4> CopyArrayElems
;
3672 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
3673 assert(C
->getModifier() == OMPC_REDUCTION_inscan
&&
3674 "Only inscan reductions are expected.");
3675 Privates
.append(C
->privates().begin(), C
->privates().end());
3676 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
3677 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
3678 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
3679 CopyArrayElems
.append(C
->copy_array_elems().begin(),
3680 C
->copy_array_elems().end());
3682 CodeGenFunction::ParentLoopDirectiveForScanRegion
ScanRegion(CGF
, S
);
3684 // Emit loop with input phase:
3686 // for (i: 0..<num_iters>) {
3690 CGF
.OMPFirstScanLoop
= true;
3691 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
3694 // #pragma omp barrier // in parallel region
3695 auto &&CodeGen
= [&S
, OMPScanNumIterations
, &LHSs
, &RHSs
, &CopyArrayElems
,
3697 &Privates
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
3699 // Emit prefix reduction:
3700 // #pragma omp master // in parallel region
3701 // for (int k = 0; k <= ceil(log2(n)); ++k)
3702 llvm::BasicBlock
*InputBB
= CGF
.Builder
.GetInsertBlock();
3703 llvm::BasicBlock
*LoopBB
= CGF
.createBasicBlock("omp.outer.log.scan.body");
3704 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock("omp.outer.log.scan.exit");
3706 CGF
.CGM
.getIntrinsic(llvm::Intrinsic::log2
, CGF
.DoubleTy
);
3708 CGF
.Builder
.CreateUIToFP(OMPScanNumIterations
, CGF
.DoubleTy
);
3709 llvm::Value
*LogVal
= CGF
.EmitNounwindRuntimeCall(F
, Arg
);
3710 F
= CGF
.CGM
.getIntrinsic(llvm::Intrinsic::ceil
, CGF
.DoubleTy
);
3711 LogVal
= CGF
.EmitNounwindRuntimeCall(F
, LogVal
);
3712 LogVal
= CGF
.Builder
.CreateFPToUI(LogVal
, CGF
.IntTy
);
3713 llvm::Value
*NMin1
= CGF
.Builder
.CreateNUWSub(
3714 OMPScanNumIterations
, llvm::ConstantInt::get(CGF
.SizeTy
, 1));
3715 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, S
.getBeginLoc());
3716 CGF
.EmitBlock(LoopBB
);
3717 auto *Counter
= CGF
.Builder
.CreatePHI(CGF
.IntTy
, 2);
3719 auto *Pow2K
= CGF
.Builder
.CreatePHI(CGF
.SizeTy
, 2);
3720 Counter
->addIncoming(llvm::ConstantInt::get(CGF
.IntTy
, 0), InputBB
);
3721 Pow2K
->addIncoming(llvm::ConstantInt::get(CGF
.SizeTy
, 1), InputBB
);
3722 // for (size i = n - 1; i >= 2 ^ k; --i)
3723 // tmp[i] op= tmp[i-pow2k];
3724 llvm::BasicBlock
*InnerLoopBB
=
3725 CGF
.createBasicBlock("omp.inner.log.scan.body");
3726 llvm::BasicBlock
*InnerExitBB
=
3727 CGF
.createBasicBlock("omp.inner.log.scan.exit");
3728 llvm::Value
*CmpI
= CGF
.Builder
.CreateICmpUGE(NMin1
, Pow2K
);
3729 CGF
.Builder
.CreateCondBr(CmpI
, InnerLoopBB
, InnerExitBB
);
3730 CGF
.EmitBlock(InnerLoopBB
);
3731 auto *IVal
= CGF
.Builder
.CreatePHI(CGF
.SizeTy
, 2);
3732 IVal
->addIncoming(NMin1
, LoopBB
);
3734 CodeGenFunction::OMPPrivateScope
PrivScope(CGF
);
3735 auto *ILHS
= LHSs
.begin();
3736 auto *IRHS
= RHSs
.begin();
3737 for (const Expr
*CopyArrayElem
: CopyArrayElems
) {
3738 const auto *LHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
3739 const auto *RHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
3740 Address LHSAddr
= Address::invalid();
3742 CodeGenFunction::OpaqueValueMapping
IdxMapping(
3744 cast
<OpaqueValueExpr
>(
3745 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
3747 LHSAddr
= CGF
.EmitLValue(CopyArrayElem
).getAddress(CGF
);
3749 PrivScope
.addPrivate(LHSVD
, LHSAddr
);
3750 Address RHSAddr
= Address::invalid();
3752 llvm::Value
*OffsetIVal
= CGF
.Builder
.CreateNUWSub(IVal
, Pow2K
);
3753 CodeGenFunction::OpaqueValueMapping
IdxMapping(
3755 cast
<OpaqueValueExpr
>(
3756 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
3757 RValue::get(OffsetIVal
));
3758 RHSAddr
= CGF
.EmitLValue(CopyArrayElem
).getAddress(CGF
);
3760 PrivScope
.addPrivate(RHSVD
, RHSAddr
);
3764 PrivScope
.Privatize();
3765 CGF
.CGM
.getOpenMPRuntime().emitReduction(
3766 CGF
, S
.getEndLoc(), Privates
, LHSs
, RHSs
, ReductionOps
,
3767 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown
});
3769 llvm::Value
*NextIVal
=
3770 CGF
.Builder
.CreateNUWSub(IVal
, llvm::ConstantInt::get(CGF
.SizeTy
, 1));
3771 IVal
->addIncoming(NextIVal
, CGF
.Builder
.GetInsertBlock());
3772 CmpI
= CGF
.Builder
.CreateICmpUGE(NextIVal
, Pow2K
);
3773 CGF
.Builder
.CreateCondBr(CmpI
, InnerLoopBB
, InnerExitBB
);
3774 CGF
.EmitBlock(InnerExitBB
);
3776 CGF
.Builder
.CreateNUWAdd(Counter
, llvm::ConstantInt::get(CGF
.IntTy
, 1));
3777 Counter
->addIncoming(Next
, CGF
.Builder
.GetInsertBlock());
3779 llvm::Value
*NextPow2K
=
3780 CGF
.Builder
.CreateShl(Pow2K
, 1, "", /*HasNUW=*/true);
3781 Pow2K
->addIncoming(NextPow2K
, CGF
.Builder
.GetInsertBlock());
3782 llvm::Value
*Cmp
= CGF
.Builder
.CreateICmpNE(Next
, LogVal
);
3783 CGF
.Builder
.CreateCondBr(Cmp
, LoopBB
, ExitBB
);
3784 auto DL1
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, S
.getEndLoc());
3785 CGF
.EmitBlock(ExitBB
);
3787 if (isOpenMPParallelDirective(S
.getDirectiveKind())) {
3788 CGF
.CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, CodeGen
, S
.getBeginLoc());
3789 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(
3790 CGF
, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
3791 /*ForceSimpleCall=*/true);
3793 RegionCodeGenTy
RCG(CodeGen
);
3797 CGF
.OMPFirstScanLoop
= false;
3801 static bool emitWorksharingDirective(CodeGenFunction
&CGF
,
3802 const OMPLoopDirective
&S
,
3804 bool HasLastprivates
;
3805 if (llvm::any_of(S
.getClausesOfKind
<OMPReductionClause
>(),
3806 [](const OMPReductionClause
*C
) {
3807 return C
->getModifier() == OMPC_REDUCTION_inscan
;
3809 const auto &&NumIteratorsGen
= [&S
](CodeGenFunction
&CGF
) {
3810 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
3811 OMPLoopScope
LoopScope(CGF
, S
);
3812 return CGF
.EmitScalarExpr(S
.getNumIterations());
3814 const auto &&FirstGen
= [&S
, HasCancel
](CodeGenFunction
&CGF
) {
3815 CodeGenFunction::OMPCancelStackRAII
CancelRegion(
3816 CGF
, S
.getDirectiveKind(), HasCancel
);
3817 (void)CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(),
3819 emitDispatchForLoopBounds
);
3820 // Emit an implicit barrier at the end.
3821 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(CGF
, S
.getBeginLoc(),
3824 const auto &&SecondGen
= [&S
, HasCancel
,
3825 &HasLastprivates
](CodeGenFunction
&CGF
) {
3826 CodeGenFunction::OMPCancelStackRAII
CancelRegion(
3827 CGF
, S
.getDirectiveKind(), HasCancel
);
3828 HasLastprivates
= CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(),
3830 emitDispatchForLoopBounds
);
3832 if (!isOpenMPParallelDirective(S
.getDirectiveKind()))
3833 emitScanBasedDirectiveDecls(CGF
, S
, NumIteratorsGen
);
3834 emitScanBasedDirective(CGF
, S
, NumIteratorsGen
, FirstGen
, SecondGen
);
3835 if (!isOpenMPParallelDirective(S
.getDirectiveKind()))
3836 emitScanBasedDirectiveFinals(CGF
, S
, NumIteratorsGen
);
3838 CodeGenFunction::OMPCancelStackRAII
CancelRegion(CGF
, S
.getDirectiveKind(),
3840 HasLastprivates
= CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(),
3842 emitDispatchForLoopBounds
);
3844 return HasLastprivates
;
3847 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective
&S
) {
3850 for (OMPClause
*C
: S
.clauses()) {
3851 if (isa
<OMPNowaitClause
>(C
))
3854 if (auto *SC
= dyn_cast
<OMPScheduleClause
>(C
)) {
3855 if (SC
->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown
)
3857 if (SC
->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown
)
3859 switch (SC
->getScheduleKind()) {
3860 case OMPC_SCHEDULE_auto
:
3861 case OMPC_SCHEDULE_dynamic
:
3862 case OMPC_SCHEDULE_runtime
:
3863 case OMPC_SCHEDULE_guided
:
3864 case OMPC_SCHEDULE_static
:
3866 case OMPC_SCHEDULE_unknown
:
3877 static llvm::omp::ScheduleKind
3878 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind
) {
3879 switch (ScheduleClauseKind
) {
3880 case OMPC_SCHEDULE_unknown
:
3881 return llvm::omp::OMP_SCHEDULE_Default
;
3882 case OMPC_SCHEDULE_auto
:
3883 return llvm::omp::OMP_SCHEDULE_Auto
;
3884 case OMPC_SCHEDULE_dynamic
:
3885 return llvm::omp::OMP_SCHEDULE_Dynamic
;
3886 case OMPC_SCHEDULE_guided
:
3887 return llvm::omp::OMP_SCHEDULE_Guided
;
3888 case OMPC_SCHEDULE_runtime
:
3889 return llvm::omp::OMP_SCHEDULE_Runtime
;
3890 case OMPC_SCHEDULE_static
:
3891 return llvm::omp::OMP_SCHEDULE_Static
;
3893 llvm_unreachable("Unhandled schedule kind");
3896 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective
&S
) {
3897 bool HasLastprivates
= false;
3898 bool UseOMPIRBuilder
=
3899 CGM
.getLangOpts().OpenMPIRBuilder
&& isSupportedByOpenMPIRBuilder(S
);
3900 auto &&CodeGen
= [this, &S
, &HasLastprivates
,
3901 UseOMPIRBuilder
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3902 // Use the OpenMPIRBuilder if enabled.
3903 if (UseOMPIRBuilder
) {
3904 bool NeedsBarrier
= !S
.getSingleClause
<OMPNowaitClause
>();
3906 llvm::omp::ScheduleKind SchedKind
= llvm::omp::OMP_SCHEDULE_Default
;
3907 llvm::Value
*ChunkSize
= nullptr;
3908 if (auto *SchedClause
= S
.getSingleClause
<OMPScheduleClause
>()) {
3910 convertClauseKindToSchedKind(SchedClause
->getScheduleKind());
3911 if (const Expr
*ChunkSizeExpr
= SchedClause
->getChunkSize())
3912 ChunkSize
= EmitScalarExpr(ChunkSizeExpr
);
3915 // Emit the associated statement and get its loop representation.
3916 const Stmt
*Inner
= S
.getRawStmt();
3917 llvm::CanonicalLoopInfo
*CLI
=
3918 EmitOMPCollapsedCanonicalLoopNest(Inner
, 1);
3920 llvm::OpenMPIRBuilder
&OMPBuilder
=
3921 CGM
.getOpenMPRuntime().getOMPBuilder();
3922 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
3923 AllocaInsertPt
->getParent(), AllocaInsertPt
->getIterator());
3924 OMPBuilder
.applyWorkshareLoop(
3925 Builder
.getCurrentDebugLocation(), CLI
, AllocaIP
, NeedsBarrier
,
3926 SchedKind
, ChunkSize
, /*HasSimdModifier=*/false,
3927 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
3928 /*HasOrderedClause=*/false);
3932 HasLastprivates
= emitWorksharingDirective(CGF
, S
, S
.hasCancel());
3936 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
3937 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
3938 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for
, CodeGen
,
3942 if (!UseOMPIRBuilder
) {
3943 // Emit an implicit barrier at the end.
3944 if (!S
.getSingleClause
<OMPNowaitClause
>() || HasLastprivates
)
3945 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(), OMPD_for
);
3947 // Check for outer lastprivate conditional update.
3948 checkForLastprivateConditionalUpdate(*this, S
);
3951 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective
&S
) {
3952 bool HasLastprivates
= false;
3953 auto &&CodeGen
= [&S
, &HasLastprivates
](CodeGenFunction
&CGF
,
3954 PrePostActionTy
&) {
3955 HasLastprivates
= emitWorksharingDirective(CGF
, S
, /*HasCancel=*/false);
3959 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
3960 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
3961 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd
, CodeGen
);
3964 // Emit an implicit barrier at the end.
3965 if (!S
.getSingleClause
<OMPNowaitClause
>() || HasLastprivates
)
3966 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(), OMPD_for
);
3967 // Check for outer lastprivate conditional update.
3968 checkForLastprivateConditionalUpdate(*this, S
);
3971 static LValue
createSectionLVal(CodeGenFunction
&CGF
, QualType Ty
,
3973 llvm::Value
*Init
= nullptr) {
3974 LValue LVal
= CGF
.MakeAddrLValue(CGF
.CreateMemTemp(Ty
, Name
), Ty
);
3976 CGF
.EmitStoreThroughLValue(RValue::get(Init
), LVal
, /*isInit*/ true);
3980 void CodeGenFunction::EmitSections(const OMPExecutableDirective
&S
) {
3981 const Stmt
*CapturedStmt
= S
.getInnermostCapturedStmt()->getCapturedStmt();
3982 const auto *CS
= dyn_cast
<CompoundStmt
>(CapturedStmt
);
3983 bool HasLastprivates
= false;
3984 auto &&CodeGen
= [&S
, CapturedStmt
, CS
,
3985 &HasLastprivates
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
3986 const ASTContext
&C
= CGF
.getContext();
3987 QualType KmpInt32Ty
=
3988 C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3989 // Emit helper vars inits.
3990 LValue LB
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.lb.",
3991 CGF
.Builder
.getInt32(0));
3992 llvm::ConstantInt
*GlobalUBVal
= CS
!= nullptr
3993 ? CGF
.Builder
.getInt32(CS
->size() - 1)
3994 : CGF
.Builder
.getInt32(0);
3996 createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.ub.", GlobalUBVal
);
3997 LValue ST
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.st.",
3998 CGF
.Builder
.getInt32(1));
3999 LValue IL
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.il.",
4000 CGF
.Builder
.getInt32(0));
4002 LValue IV
= createSectionLVal(CGF
, KmpInt32Ty
, ".omp.sections.iv.");
4003 OpaqueValueExpr
IVRefExpr(S
.getBeginLoc(), KmpInt32Ty
, VK_LValue
);
4004 CodeGenFunction::OpaqueValueMapping
OpaqueIV(CGF
, &IVRefExpr
, IV
);
4005 OpaqueValueExpr
UBRefExpr(S
.getBeginLoc(), KmpInt32Ty
, VK_LValue
);
4006 CodeGenFunction::OpaqueValueMapping
OpaqueUB(CGF
, &UBRefExpr
, UB
);
4007 // Generate condition for loop.
4008 BinaryOperator
*Cond
= BinaryOperator::Create(
4009 C
, &IVRefExpr
, &UBRefExpr
, BO_LE
, C
.BoolTy
, VK_PRValue
, OK_Ordinary
,
4010 S
.getBeginLoc(), FPOptionsOverride());
4011 // Increment for loop counter.
4012 UnaryOperator
*Inc
= UnaryOperator::Create(
4013 C
, &IVRefExpr
, UO_PreInc
, KmpInt32Ty
, VK_PRValue
, OK_Ordinary
,
4014 S
.getBeginLoc(), true, FPOptionsOverride());
4015 auto &&BodyGen
= [CapturedStmt
, CS
, &S
, &IV
](CodeGenFunction
&CGF
) {
4016 // Iterate through all sections and emit a switch construct:
4019 // <SectionStmt[0]>;
4022 // case <NumSection> - 1:
4023 // <SectionStmt[<NumSection> - 1]>;
4026 // .omp.sections.exit:
4027 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".omp.sections.exit");
4028 llvm::SwitchInst
*SwitchStmt
=
4029 CGF
.Builder
.CreateSwitch(CGF
.EmitLoadOfScalar(IV
, S
.getBeginLoc()),
4030 ExitBB
, CS
== nullptr ? 1 : CS
->size());
4032 unsigned CaseNumber
= 0;
4033 for (const Stmt
*SubStmt
: CS
->children()) {
4034 auto CaseBB
= CGF
.createBasicBlock(".omp.sections.case");
4035 CGF
.EmitBlock(CaseBB
);
4036 SwitchStmt
->addCase(CGF
.Builder
.getInt32(CaseNumber
), CaseBB
);
4037 CGF
.EmitStmt(SubStmt
);
4038 CGF
.EmitBranch(ExitBB
);
4042 llvm::BasicBlock
*CaseBB
= CGF
.createBasicBlock(".omp.sections.case");
4043 CGF
.EmitBlock(CaseBB
);
4044 SwitchStmt
->addCase(CGF
.Builder
.getInt32(0), CaseBB
);
4045 CGF
.EmitStmt(CapturedStmt
);
4046 CGF
.EmitBranch(ExitBB
);
4048 CGF
.EmitBlock(ExitBB
, /*IsFinished=*/true);
4051 CodeGenFunction::OMPPrivateScope
LoopScope(CGF
);
4052 if (CGF
.EmitOMPFirstprivateClause(S
, LoopScope
)) {
4053 // Emit implicit barrier to synchronize threads and avoid data races on
4054 // initialization of firstprivate variables and post-update of lastprivate
4056 CGF
.CGM
.getOpenMPRuntime().emitBarrierCall(
4057 CGF
, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
4058 /*ForceSimpleCall=*/true);
4060 CGF
.EmitOMPPrivateClause(S
, LoopScope
);
4061 CGOpenMPRuntime::LastprivateConditionalRAII
LPCRegion(CGF
, S
, IV
);
4062 HasLastprivates
= CGF
.EmitOMPLastprivateClauseInit(S
, LoopScope
);
4063 CGF
.EmitOMPReductionClauseInit(S
, LoopScope
);
4064 (void)LoopScope
.Privatize();
4065 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
4066 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
4068 // Emit static non-chunked loop.
4069 OpenMPScheduleTy ScheduleKind
;
4070 ScheduleKind
.Schedule
= OMPC_SCHEDULE_static
;
4071 CGOpenMPRuntime::StaticRTInput
StaticInit(
4072 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL
.getAddress(CGF
),
4073 LB
.getAddress(CGF
), UB
.getAddress(CGF
), ST
.getAddress(CGF
));
4074 CGF
.CGM
.getOpenMPRuntime().emitForStaticInit(
4075 CGF
, S
.getBeginLoc(), S
.getDirectiveKind(), ScheduleKind
, StaticInit
);
4076 // UB = min(UB, GlobalUB);
4077 llvm::Value
*UBVal
= CGF
.EmitLoadOfScalar(UB
, S
.getBeginLoc());
4078 llvm::Value
*MinUBGlobalUB
= CGF
.Builder
.CreateSelect(
4079 CGF
.Builder
.CreateICmpSLT(UBVal
, GlobalUBVal
), UBVal
, GlobalUBVal
);
4080 CGF
.EmitStoreOfScalar(MinUBGlobalUB
, UB
);
4082 CGF
.EmitStoreOfScalar(CGF
.EmitLoadOfScalar(LB
, S
.getBeginLoc()), IV
);
4083 // while (idx <= UB) { BODY; ++idx; }
4084 CGF
.EmitOMPInnerLoop(S
, /*RequiresCleanup=*/false, Cond
, Inc
, BodyGen
,
4085 [](CodeGenFunction
&) {});
4086 // Tell the runtime we are done.
4087 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
) {
4088 CGF
.CGM
.getOpenMPRuntime().emitForStaticFinish(CGF
, S
.getEndLoc(),
4089 S
.getDirectiveKind());
4091 CGF
.OMPCancelStack
.emitExit(CGF
, S
.getDirectiveKind(), CodeGen
);
4092 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
4093 // Emit post-update of the reduction variables if IsLastIter != 0.
4094 emitPostUpdateForReductionClause(CGF
, S
, [IL
, &S
](CodeGenFunction
&CGF
) {
4095 return CGF
.Builder
.CreateIsNotNull(
4096 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
4099 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4100 if (HasLastprivates
)
4101 CGF
.EmitOMPLastprivateClauseFinal(
4102 S
, /*NoFinals=*/false,
4103 CGF
.Builder
.CreateIsNotNull(
4104 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc())));
4107 bool HasCancel
= false;
4108 if (auto *OSD
= dyn_cast
<OMPSectionsDirective
>(&S
))
4109 HasCancel
= OSD
->hasCancel();
4110 else if (auto *OPSD
= dyn_cast
<OMPParallelSectionsDirective
>(&S
))
4111 HasCancel
= OPSD
->hasCancel();
4112 OMPCancelStackRAII
CancelRegion(*this, S
.getDirectiveKind(), HasCancel
);
4113 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections
, CodeGen
,
4115 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4116 // clause. Otherwise the barrier will be generated by the codegen for the
4118 if (HasLastprivates
&& S
.getSingleClause
<OMPNowaitClause
>()) {
4119 // Emit implicit barrier to synchronize threads and avoid data races on
4120 // initialization of firstprivate variables.
4121 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(),
4126 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective
&S
) {
4127 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4128 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4129 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4130 using BodyGenCallbackTy
= llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy
;
4132 auto FiniCB
= [this](InsertPointTy IP
) {
4133 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4136 const CapturedStmt
*ICS
= S
.getInnermostCapturedStmt();
4137 const Stmt
*CapturedStmt
= S
.getInnermostCapturedStmt()->getCapturedStmt();
4138 const auto *CS
= dyn_cast
<CompoundStmt
>(CapturedStmt
);
4139 llvm::SmallVector
<BodyGenCallbackTy
, 4> SectionCBVector
;
4141 for (const Stmt
*SubStmt
: CS
->children()) {
4142 auto SectionCB
= [this, SubStmt
](InsertPointTy AllocaIP
,
4143 InsertPointTy CodeGenIP
) {
4144 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4145 *this, SubStmt
, AllocaIP
, CodeGenIP
, "section");
4147 SectionCBVector
.push_back(SectionCB
);
4150 auto SectionCB
= [this, CapturedStmt
](InsertPointTy AllocaIP
,
4151 InsertPointTy CodeGenIP
) {
4152 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4153 *this, CapturedStmt
, AllocaIP
, CodeGenIP
, "section");
4155 SectionCBVector
.push_back(SectionCB
);
4158 // Privatization callback that performs appropriate action for
4159 // shared/private/firstprivate/lastprivate/copyin/... variables.
4161 // TODO: This defaults to shared right now.
4162 auto PrivCB
= [](InsertPointTy AllocaIP
, InsertPointTy CodeGenIP
,
4163 llvm::Value
&, llvm::Value
&Val
, llvm::Value
*&ReplVal
) {
4164 // The next line is appropriate only for variables (Val) with the
4165 // data-sharing attribute "shared".
4171 CGCapturedStmtInfo
CGSI(*ICS
, CR_OpenMP
);
4172 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(*this, &CGSI
);
4173 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
4174 AllocaInsertPt
->getParent(), AllocaInsertPt
->getIterator());
4175 Builder
.restoreIP(OMPBuilder
.createSections(
4176 Builder
, AllocaIP
, SectionCBVector
, PrivCB
, FiniCB
, S
.hasCancel(),
4177 S
.getSingleClause
<OMPNowaitClause
>()));
4182 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4183 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
4186 // Emit an implicit barrier at the end.
4187 if (!S
.getSingleClause
<OMPNowaitClause
>()) {
4188 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(),
4191 // Check for outer lastprivate conditional update.
4192 checkForLastprivateConditionalUpdate(*this, S
);
4195 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective
&S
) {
4196 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4197 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4198 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4200 const Stmt
*SectionRegionBodyStmt
= S
.getAssociatedStmt();
4201 auto FiniCB
= [this](InsertPointTy IP
) {
4202 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4205 auto BodyGenCB
= [SectionRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4206 InsertPointTy CodeGenIP
) {
4207 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4208 *this, SectionRegionBodyStmt
, AllocaIP
, CodeGenIP
, "section");
4211 LexicalScope
Scope(*this, S
.getSourceRange());
4213 Builder
.restoreIP(OMPBuilder
.createSection(Builder
, BodyGenCB
, FiniCB
));
4217 LexicalScope
Scope(*this, S
.getSourceRange());
4219 EmitStmt(S
.getAssociatedStmt());
4222 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective
&S
) {
4223 llvm::SmallVector
<const Expr
*, 8> CopyprivateVars
;
4224 llvm::SmallVector
<const Expr
*, 8> DestExprs
;
4225 llvm::SmallVector
<const Expr
*, 8> SrcExprs
;
4226 llvm::SmallVector
<const Expr
*, 8> AssignmentOps
;
4227 // Check if there are any 'copyprivate' clauses associated with this
4228 // 'single' construct.
4229 // Build a list of copyprivate variables along with helper expressions
4230 // (<source>, <destination>, <destination>=<source> expressions)
4231 for (const auto *C
: S
.getClausesOfKind
<OMPCopyprivateClause
>()) {
4232 CopyprivateVars
.append(C
->varlists().begin(), C
->varlists().end());
4233 DestExprs
.append(C
->destination_exprs().begin(),
4234 C
->destination_exprs().end());
4235 SrcExprs
.append(C
->source_exprs().begin(), C
->source_exprs().end());
4236 AssignmentOps
.append(C
->assignment_ops().begin(),
4237 C
->assignment_ops().end());
4239 // Emit code for 'single' region along with 'copyprivate' clauses
4240 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4242 OMPPrivateScope
SingleScope(CGF
);
4243 (void)CGF
.EmitOMPFirstprivateClause(S
, SingleScope
);
4244 CGF
.EmitOMPPrivateClause(S
, SingleScope
);
4245 (void)SingleScope
.Privatize();
4246 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
4250 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4251 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
4252 CGM
.getOpenMPRuntime().emitSingleRegion(*this, CodeGen
, S
.getBeginLoc(),
4253 CopyprivateVars
, DestExprs
,
4254 SrcExprs
, AssignmentOps
);
4256 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4257 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4258 if (!S
.getSingleClause
<OMPNowaitClause
>() && CopyprivateVars
.empty()) {
4259 CGM
.getOpenMPRuntime().emitBarrierCall(
4260 *this, S
.getBeginLoc(),
4261 S
.getSingleClause
<OMPNowaitClause
>() ? OMPD_unknown
: OMPD_single
);
4263 // Check for outer lastprivate conditional update.
4264 checkForLastprivateConditionalUpdate(*this, S
);
4267 static void emitMaster(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
4268 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4270 CGF
.EmitStmt(S
.getRawStmt());
4272 CGF
.CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, CodeGen
, S
.getBeginLoc());
4275 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective
&S
) {
4276 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4277 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4278 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4280 const Stmt
*MasterRegionBodyStmt
= S
.getAssociatedStmt();
4282 auto FiniCB
= [this](InsertPointTy IP
) {
4283 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4286 auto BodyGenCB
= [MasterRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4287 InsertPointTy CodeGenIP
) {
4288 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4289 *this, MasterRegionBodyStmt
, AllocaIP
, CodeGenIP
, "master");
4292 LexicalScope
Scope(*this, S
.getSourceRange());
4294 Builder
.restoreIP(OMPBuilder
.createMaster(Builder
, BodyGenCB
, FiniCB
));
4298 LexicalScope
Scope(*this, S
.getSourceRange());
4300 emitMaster(*this, S
);
4303 static void emitMasked(CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
4304 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4306 CGF
.EmitStmt(S
.getRawStmt());
4308 Expr
*Filter
= nullptr;
4309 if (const auto *FilterClause
= S
.getSingleClause
<OMPFilterClause
>())
4310 Filter
= FilterClause
->getThreadID();
4311 CGF
.CGM
.getOpenMPRuntime().emitMaskedRegion(CGF
, CodeGen
, S
.getBeginLoc(),
4315 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective
&S
) {
4316 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4317 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4318 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4320 const Stmt
*MaskedRegionBodyStmt
= S
.getAssociatedStmt();
4321 const Expr
*Filter
= nullptr;
4322 if (const auto *FilterClause
= S
.getSingleClause
<OMPFilterClause
>())
4323 Filter
= FilterClause
->getThreadID();
4324 llvm::Value
*FilterVal
= Filter
4325 ? EmitScalarExpr(Filter
, CGM
.Int32Ty
)
4326 : llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/0);
4328 auto FiniCB
= [this](InsertPointTy IP
) {
4329 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4332 auto BodyGenCB
= [MaskedRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4333 InsertPointTy CodeGenIP
) {
4334 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4335 *this, MaskedRegionBodyStmt
, AllocaIP
, CodeGenIP
, "masked");
4338 LexicalScope
Scope(*this, S
.getSourceRange());
4341 OMPBuilder
.createMasked(Builder
, BodyGenCB
, FiniCB
, FilterVal
));
4345 LexicalScope
Scope(*this, S
.getSourceRange());
4347 emitMasked(*this, S
);
4350 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective
&S
) {
4351 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
4352 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
4353 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
4355 const Stmt
*CriticalRegionBodyStmt
= S
.getAssociatedStmt();
4356 const Expr
*Hint
= nullptr;
4357 if (const auto *HintClause
= S
.getSingleClause
<OMPHintClause
>())
4358 Hint
= HintClause
->getHint();
4360 // TODO: This is slightly different from what's currently being done in
4361 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4362 // about typing is final.
4363 llvm::Value
*HintInst
= nullptr;
4366 Builder
.CreateIntCast(EmitScalarExpr(Hint
), CGM
.Int32Ty
, false);
4368 auto FiniCB
= [this](InsertPointTy IP
) {
4369 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
4372 auto BodyGenCB
= [CriticalRegionBodyStmt
, this](InsertPointTy AllocaIP
,
4373 InsertPointTy CodeGenIP
) {
4374 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4375 *this, CriticalRegionBodyStmt
, AllocaIP
, CodeGenIP
, "critical");
4378 LexicalScope
Scope(*this, S
.getSourceRange());
4380 Builder
.restoreIP(OMPBuilder
.createCritical(
4381 Builder
, BodyGenCB
, FiniCB
, S
.getDirectiveName().getAsString(),
4387 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4389 CGF
.EmitStmt(S
.getAssociatedStmt());
4391 const Expr
*Hint
= nullptr;
4392 if (const auto *HintClause
= S
.getSingleClause
<OMPHintClause
>())
4393 Hint
= HintClause
->getHint();
4394 LexicalScope
Scope(*this, S
.getSourceRange());
4396 CGM
.getOpenMPRuntime().emitCriticalRegion(*this,
4397 S
.getDirectiveName().getAsString(),
4398 CodeGen
, S
.getBeginLoc(), Hint
);
4401 void CodeGenFunction::EmitOMPParallelForDirective(
4402 const OMPParallelForDirective
&S
) {
4403 // Emit directive as a combined directive that consists of two implicit
4404 // directives: 'parallel' with 'for' directive.
4405 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4407 emitOMPCopyinClause(CGF
, S
);
4408 (void)emitWorksharingDirective(CGF
, S
, S
.hasCancel());
4411 const auto &&NumIteratorsGen
= [&S
](CodeGenFunction
&CGF
) {
4412 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
4413 CGCapturedStmtInfo
CGSI(CR_OpenMP
);
4414 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGSI
);
4415 OMPLoopScope
LoopScope(CGF
, S
);
4416 return CGF
.EmitScalarExpr(S
.getNumIterations());
4418 bool IsInscan
= llvm::any_of(S
.getClausesOfKind
<OMPReductionClause
>(),
4419 [](const OMPReductionClause
*C
) {
4420 return C
->getModifier() == OMPC_REDUCTION_inscan
;
4423 emitScanBasedDirectiveDecls(*this, S
, NumIteratorsGen
);
4425 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4426 emitCommonOMPParallelDirective(*this, S
, OMPD_for
, CodeGen
,
4427 emitEmptyBoundParameters
);
4429 emitScanBasedDirectiveFinals(*this, S
, NumIteratorsGen
);
4431 // Check for outer lastprivate conditional update.
4432 checkForLastprivateConditionalUpdate(*this, S
);
4435 void CodeGenFunction::EmitOMPParallelForSimdDirective(
4436 const OMPParallelForSimdDirective
&S
) {
4437 // Emit directive as a combined directive that consists of two implicit
4438 // directives: 'parallel' with 'for' directive.
4439 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4441 emitOMPCopyinClause(CGF
, S
);
4442 (void)emitWorksharingDirective(CGF
, S
, /*HasCancel=*/false);
4445 const auto &&NumIteratorsGen
= [&S
](CodeGenFunction
&CGF
) {
4446 CodeGenFunction::OMPLocalDeclMapRAII
Scope(CGF
);
4447 CGCapturedStmtInfo
CGSI(CR_OpenMP
);
4448 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGSI
);
4449 OMPLoopScope
LoopScope(CGF
, S
);
4450 return CGF
.EmitScalarExpr(S
.getNumIterations());
4452 bool IsInscan
= llvm::any_of(S
.getClausesOfKind
<OMPReductionClause
>(),
4453 [](const OMPReductionClause
*C
) {
4454 return C
->getModifier() == OMPC_REDUCTION_inscan
;
4457 emitScanBasedDirectiveDecls(*this, S
, NumIteratorsGen
);
4459 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4460 emitCommonOMPParallelDirective(*this, S
, OMPD_for_simd
, CodeGen
,
4461 emitEmptyBoundParameters
);
4463 emitScanBasedDirectiveFinals(*this, S
, NumIteratorsGen
);
4465 // Check for outer lastprivate conditional update.
4466 checkForLastprivateConditionalUpdate(*this, S
);
4469 void CodeGenFunction::EmitOMPParallelMasterDirective(
4470 const OMPParallelMasterDirective
&S
) {
4471 // Emit directive as a combined directive that consists of two implicit
4472 // directives: 'parallel' with 'master' directive.
4473 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4475 OMPPrivateScope
PrivateScope(CGF
);
4476 emitOMPCopyinClause(CGF
, S
);
4477 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
4478 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
4479 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
4480 (void)PrivateScope
.Privatize();
4482 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
4486 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4487 emitCommonOMPParallelDirective(*this, S
, OMPD_master
, CodeGen
,
4488 emitEmptyBoundParameters
);
4489 emitPostUpdateForReductionClause(*this, S
,
4490 [](CodeGenFunction
&) { return nullptr; });
4492 // Check for outer lastprivate conditional update.
4493 checkForLastprivateConditionalUpdate(*this, S
);
4496 void CodeGenFunction::EmitOMPParallelMaskedDirective(
4497 const OMPParallelMaskedDirective
&S
) {
4498 // Emit directive as a combined directive that consists of two implicit
4499 // directives: 'parallel' with 'masked' directive.
4500 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4502 OMPPrivateScope
PrivateScope(CGF
);
4503 emitOMPCopyinClause(CGF
, S
);
4504 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
4505 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
4506 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
4507 (void)PrivateScope
.Privatize();
4509 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
4513 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4514 emitCommonOMPParallelDirective(*this, S
, OMPD_masked
, CodeGen
,
4515 emitEmptyBoundParameters
);
4516 emitPostUpdateForReductionClause(*this, S
,
4517 [](CodeGenFunction
&) { return nullptr; });
4519 // Check for outer lastprivate conditional update.
4520 checkForLastprivateConditionalUpdate(*this, S
);
4523 void CodeGenFunction::EmitOMPParallelSectionsDirective(
4524 const OMPParallelSectionsDirective
&S
) {
4525 // Emit directive as a combined directive that consists of two implicit
4526 // directives: 'parallel' with 'sections' directive.
4527 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4529 emitOMPCopyinClause(CGF
, S
);
4530 CGF
.EmitSections(S
);
4534 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
4535 emitCommonOMPParallelDirective(*this, S
, OMPD_sections
, CodeGen
,
4536 emitEmptyBoundParameters
);
4538 // Check for outer lastprivate conditional update.
4539 checkForLastprivateConditionalUpdate(*this, S
);
4543 /// Get the list of variables declared in the context of the untied tasks.
4544 class CheckVarsEscapingUntiedTaskDeclContext final
4545 : public ConstStmtVisitor
<CheckVarsEscapingUntiedTaskDeclContext
> {
4546 llvm::SmallVector
<const VarDecl
*, 4> PrivateDecls
;
4549 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4550 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4551 void VisitDeclStmt(const DeclStmt
*S
) {
4554 // Need to privatize only local vars, static locals can be processed as is.
4555 for (const Decl
*D
: S
->decls()) {
4556 if (const auto *VD
= dyn_cast_or_null
<VarDecl
>(D
))
4557 if (VD
->hasLocalStorage())
4558 PrivateDecls
.push_back(VD
);
4561 void VisitOMPExecutableDirective(const OMPExecutableDirective
*) {}
4562 void VisitCapturedStmt(const CapturedStmt
*) {}
4563 void VisitLambdaExpr(const LambdaExpr
*) {}
4564 void VisitBlockExpr(const BlockExpr
*) {}
4565 void VisitStmt(const Stmt
*S
) {
4568 for (const Stmt
*Child
: S
->children())
4573 /// Swaps list of vars with the provided one.
4574 ArrayRef
<const VarDecl
*> getPrivateDecls() const { return PrivateDecls
; }
4576 } // anonymous namespace
4578 static void buildDependences(const OMPExecutableDirective
&S
,
4579 OMPTaskDataTy
&Data
) {
4581 // First look for 'omp_all_memory' and add this first.
4582 bool OmpAllMemory
= false;
4584 S
.getClausesOfKind
<OMPDependClause
>(), [](const OMPDependClause
*C
) {
4585 return C
->getDependencyKind() == OMPC_DEPEND_outallmemory
||
4586 C
->getDependencyKind() == OMPC_DEPEND_inoutallmemory
;
4588 OmpAllMemory
= true;
4589 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4590 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4592 OMPTaskDataTy::DependData
&DD
=
4593 Data
.Dependences
.emplace_back(OMPC_DEPEND_outallmemory
,
4594 /*IteratorExpr=*/nullptr);
4595 // Add a nullptr Expr to simplify the codegen in emitDependData.
4596 DD
.DepExprs
.push_back(nullptr);
4598 // Add remaining dependences skipping any 'out' or 'inout' if they are
4599 // overridden by 'omp_all_memory'.
4600 for (const auto *C
: S
.getClausesOfKind
<OMPDependClause
>()) {
4601 OpenMPDependClauseKind Kind
= C
->getDependencyKind();
4602 if (Kind
== OMPC_DEPEND_outallmemory
|| Kind
== OMPC_DEPEND_inoutallmemory
)
4604 if (OmpAllMemory
&& (Kind
== OMPC_DEPEND_out
|| Kind
== OMPC_DEPEND_inout
))
4606 OMPTaskDataTy::DependData
&DD
=
4607 Data
.Dependences
.emplace_back(C
->getDependencyKind(), C
->getModifier());
4608 DD
.DepExprs
.append(C
->varlist_begin(), C
->varlist_end());
4612 void CodeGenFunction::EmitOMPTaskBasedDirective(
4613 const OMPExecutableDirective
&S
, const OpenMPDirectiveKind CapturedRegion
,
4614 const RegionCodeGenTy
&BodyGen
, const TaskGenTy
&TaskGen
,
4615 OMPTaskDataTy
&Data
) {
4616 // Emit outlined function for task construct.
4617 const CapturedStmt
*CS
= S
.getCapturedStmt(CapturedRegion
);
4618 auto I
= CS
->getCapturedDecl()->param_begin();
4619 auto PartId
= std::next(I
);
4620 auto TaskT
= std::next(I
, 4);
4621 // Check if the task is final
4622 if (const auto *Clause
= S
.getSingleClause
<OMPFinalClause
>()) {
4623 // If the condition constant folds and can be elided, try to avoid emitting
4624 // the condition and the dead arm of the if/else.
4625 const Expr
*Cond
= Clause
->getCondition();
4627 if (ConstantFoldsToSimpleInteger(Cond
, CondConstant
))
4628 Data
.Final
.setInt(CondConstant
);
4630 Data
.Final
.setPointer(EvaluateExprAsBool(Cond
));
4632 // By default the task is not final.
4633 Data
.Final
.setInt(/*IntVal=*/false);
4635 // Check if the task has 'priority' clause.
4636 if (const auto *Clause
= S
.getSingleClause
<OMPPriorityClause
>()) {
4637 const Expr
*Prio
= Clause
->getPriority();
4638 Data
.Priority
.setInt(/*IntVal=*/true);
4639 Data
.Priority
.setPointer(EmitScalarConversion(
4640 EmitScalarExpr(Prio
), Prio
->getType(),
4641 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4642 Prio
->getExprLoc()));
4644 // The first function argument for tasks is a thread id, the second one is a
4645 // part id (0 for tied tasks, >=0 for untied task).
4646 llvm::DenseSet
<const VarDecl
*> EmittedAsPrivate
;
4647 // Get list of private variables.
4648 for (const auto *C
: S
.getClausesOfKind
<OMPPrivateClause
>()) {
4649 auto IRef
= C
->varlist_begin();
4650 for (const Expr
*IInit
: C
->private_copies()) {
4651 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
4652 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
4653 Data
.PrivateVars
.push_back(*IRef
);
4654 Data
.PrivateCopies
.push_back(IInit
);
4659 EmittedAsPrivate
.clear();
4660 // Get list of firstprivate variables.
4661 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
4662 auto IRef
= C
->varlist_begin();
4663 auto IElemInitRef
= C
->inits().begin();
4664 for (const Expr
*IInit
: C
->private_copies()) {
4665 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
4666 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
4667 Data
.FirstprivateVars
.push_back(*IRef
);
4668 Data
.FirstprivateCopies
.push_back(IInit
);
4669 Data
.FirstprivateInits
.push_back(*IElemInitRef
);
4675 // Get list of lastprivate variables (for taskloops).
4676 llvm::MapVector
<const VarDecl
*, const DeclRefExpr
*> LastprivateDstsOrigs
;
4677 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
4678 auto IRef
= C
->varlist_begin();
4679 auto ID
= C
->destination_exprs().begin();
4680 for (const Expr
*IInit
: C
->private_copies()) {
4681 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRef
)->getDecl());
4682 if (EmittedAsPrivate
.insert(OrigVD
->getCanonicalDecl()).second
) {
4683 Data
.LastprivateVars
.push_back(*IRef
);
4684 Data
.LastprivateCopies
.push_back(IInit
);
4686 LastprivateDstsOrigs
.insert(
4687 std::make_pair(cast
<VarDecl
>(cast
<DeclRefExpr
>(*ID
)->getDecl()),
4688 cast
<DeclRefExpr
>(*IRef
)));
4693 SmallVector
<const Expr
*, 4> LHSs
;
4694 SmallVector
<const Expr
*, 4> RHSs
;
4695 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
4696 Data
.ReductionVars
.append(C
->varlist_begin(), C
->varlist_end());
4697 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
4698 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
4699 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
4700 C
->reduction_ops().end());
4701 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
4702 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
4704 Data
.Reductions
= CGM
.getOpenMPRuntime().emitTaskReductionInit(
4705 *this, S
.getBeginLoc(), LHSs
, RHSs
, Data
);
4706 // Build list of dependences.
4707 buildDependences(S
, Data
);
4708 // Get list of local vars for untied tasks.
4710 CheckVarsEscapingUntiedTaskDeclContext Checker
;
4711 Checker
.Visit(S
.getInnermostCapturedStmt()->getCapturedStmt());
4712 Data
.PrivateLocals
.append(Checker
.getPrivateDecls().begin(),
4713 Checker
.getPrivateDecls().end());
4715 auto &&CodeGen
= [&Data
, &S
, CS
, &BodyGen
, &LastprivateDstsOrigs
,
4716 CapturedRegion
](CodeGenFunction
&CGF
,
4717 PrePostActionTy
&Action
) {
4718 llvm::MapVector
<CanonicalDeclPtr
<const VarDecl
>,
4719 std::pair
<Address
, Address
>>
4721 // Set proper addresses for generated private copies.
4722 OMPPrivateScope
Scope(CGF
);
4723 // Generate debug info for variables present in shared clause.
4724 if (auto *DI
= CGF
.getDebugInfo()) {
4725 llvm::SmallDenseMap
<const VarDecl
*, FieldDecl
*> CaptureFields
=
4726 CGF
.CapturedStmtInfo
->getCaptureFields();
4727 llvm::Value
*ContextValue
= CGF
.CapturedStmtInfo
->getContextValue();
4728 if (CaptureFields
.size() && ContextValue
) {
4729 unsigned CharWidth
= CGF
.getContext().getCharWidth();
4730 // The shared variables are packed together as members of structure.
4731 // So the address of each shared variable can be computed by adding
4732 // offset of it (within record) to the base address of record. For each
4733 // shared variable, debug intrinsic llvm.dbg.declare is generated with
4734 // appropriate expressions (DIExpression).
4736 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4737 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4739 // metadata !DIExpression(DW_OP_deref))
4740 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4742 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4743 for (auto It
= CaptureFields
.begin(); It
!= CaptureFields
.end(); ++It
) {
4744 const VarDecl
*SharedVar
= It
->first
;
4745 RecordDecl
*CaptureRecord
= It
->second
->getParent();
4746 const ASTRecordLayout
&Layout
=
4747 CGF
.getContext().getASTRecordLayout(CaptureRecord
);
4749 Layout
.getFieldOffset(It
->second
->getFieldIndex()) / CharWidth
;
4750 if (CGF
.CGM
.getCodeGenOpts().hasReducedDebugInfo())
4751 (void)DI
->EmitDeclareOfAutoVariable(SharedVar
, ContextValue
,
4752 CGF
.Builder
, false);
4753 llvm::Instruction
&Last
= CGF
.Builder
.GetInsertBlock()->back();
4754 // Get the call dbg.declare instruction we just created and update
4755 // its DIExpression to add offset to base address.
4756 if (auto DDI
= dyn_cast
<llvm::DbgVariableIntrinsic
>(&Last
)) {
4757 SmallVector
<uint64_t, 8> Ops
;
4758 // Add offset to the base address if non zero.
4760 Ops
.push_back(llvm::dwarf::DW_OP_plus_uconst
);
4761 Ops
.push_back(Offset
);
4763 Ops
.push_back(llvm::dwarf::DW_OP_deref
);
4764 auto &Ctx
= DDI
->getContext();
4765 llvm::DIExpression
*DIExpr
= llvm::DIExpression::get(Ctx
, Ops
);
4766 Last
.setOperand(2, llvm::MetadataAsValue::get(Ctx
, DIExpr
));
4771 llvm::SmallVector
<std::pair
<const VarDecl
*, Address
>, 16> FirstprivatePtrs
;
4772 if (!Data
.PrivateVars
.empty() || !Data
.FirstprivateVars
.empty() ||
4773 !Data
.LastprivateVars
.empty() || !Data
.PrivateLocals
.empty()) {
4774 enum { PrivatesParam
= 2, CopyFnParam
= 3 };
4775 llvm::Value
*CopyFn
= CGF
.Builder
.CreateLoad(
4776 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(CopyFnParam
)));
4777 llvm::Value
*PrivatesPtr
= CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(
4778 CS
->getCapturedDecl()->getParam(PrivatesParam
)));
4780 llvm::SmallVector
<std::pair
<const VarDecl
*, Address
>, 16> PrivatePtrs
;
4781 llvm::SmallVector
<llvm::Value
*, 16> CallArgs
;
4782 llvm::SmallVector
<llvm::Type
*, 4> ParamTypes
;
4783 CallArgs
.push_back(PrivatesPtr
);
4784 ParamTypes
.push_back(PrivatesPtr
->getType());
4785 for (const Expr
*E
: Data
.PrivateVars
) {
4786 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
4787 Address PrivatePtr
= CGF
.CreateMemTemp(
4788 CGF
.getContext().getPointerType(E
->getType()), ".priv.ptr.addr");
4789 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
4790 CallArgs
.push_back(PrivatePtr
.getPointer());
4791 ParamTypes
.push_back(PrivatePtr
.getType());
4793 for (const Expr
*E
: Data
.FirstprivateVars
) {
4794 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
4795 Address PrivatePtr
=
4796 CGF
.CreateMemTemp(CGF
.getContext().getPointerType(E
->getType()),
4797 ".firstpriv.ptr.addr");
4798 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
4799 FirstprivatePtrs
.emplace_back(VD
, PrivatePtr
);
4800 CallArgs
.push_back(PrivatePtr
.getPointer());
4801 ParamTypes
.push_back(PrivatePtr
.getType());
4803 for (const Expr
*E
: Data
.LastprivateVars
) {
4804 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
4805 Address PrivatePtr
=
4806 CGF
.CreateMemTemp(CGF
.getContext().getPointerType(E
->getType()),
4807 ".lastpriv.ptr.addr");
4808 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
4809 CallArgs
.push_back(PrivatePtr
.getPointer());
4810 ParamTypes
.push_back(PrivatePtr
.getType());
4812 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
4813 QualType Ty
= VD
->getType().getNonReferenceType();
4814 if (VD
->getType()->isLValueReferenceType())
4815 Ty
= CGF
.getContext().getPointerType(Ty
);
4816 if (isAllocatableDecl(VD
))
4817 Ty
= CGF
.getContext().getPointerType(Ty
);
4818 Address PrivatePtr
= CGF
.CreateMemTemp(
4819 CGF
.getContext().getPointerType(Ty
), ".local.ptr.addr");
4820 auto Result
= UntiedLocalVars
.insert(
4821 std::make_pair(VD
, std::make_pair(PrivatePtr
, Address::invalid())));
4822 // If key exists update in place.
4823 if (Result
.second
== false)
4824 *Result
.first
= std::make_pair(
4825 VD
, std::make_pair(PrivatePtr
, Address::invalid()));
4826 CallArgs
.push_back(PrivatePtr
.getPointer());
4827 ParamTypes
.push_back(PrivatePtr
.getType());
4829 auto *CopyFnTy
= llvm::FunctionType::get(CGF
.Builder
.getVoidTy(),
4830 ParamTypes
, /*isVarArg=*/false);
4831 CopyFn
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4832 CopyFn
, CopyFnTy
->getPointerTo());
4833 CGF
.CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(
4834 CGF
, S
.getBeginLoc(), {CopyFnTy
, CopyFn
}, CallArgs
);
4835 for (const auto &Pair
: LastprivateDstsOrigs
) {
4836 const auto *OrigVD
= cast
<VarDecl
>(Pair
.second
->getDecl());
4837 DeclRefExpr
DRE(CGF
.getContext(), const_cast<VarDecl
*>(OrigVD
),
4838 /*RefersToEnclosingVariableOrCapture=*/
4839 CGF
.CapturedStmtInfo
->lookup(OrigVD
) != nullptr,
4840 Pair
.second
->getType(), VK_LValue
,
4841 Pair
.second
->getExprLoc());
4842 Scope
.addPrivate(Pair
.first
, CGF
.EmitLValue(&DRE
).getAddress(CGF
));
4844 for (const auto &Pair
: PrivatePtrs
) {
4845 Address Replacement
= Address(
4846 CGF
.Builder
.CreateLoad(Pair
.second
),
4847 CGF
.ConvertTypeForMem(Pair
.first
->getType().getNonReferenceType()),
4848 CGF
.getContext().getDeclAlign(Pair
.first
));
4849 Scope
.addPrivate(Pair
.first
, Replacement
);
4850 if (auto *DI
= CGF
.getDebugInfo())
4851 if (CGF
.CGM
.getCodeGenOpts().hasReducedDebugInfo())
4852 (void)DI
->EmitDeclareOfAutoVariable(
4853 Pair
.first
, Pair
.second
.getPointer(), CGF
.Builder
,
4854 /*UsePointerValue*/ true);
4856 // Adjust mapping for internal locals by mapping actual memory instead of
4857 // a pointer to this memory.
4858 for (auto &Pair
: UntiedLocalVars
) {
4859 QualType VDType
= Pair
.first
->getType().getNonReferenceType();
4860 if (Pair
.first
->getType()->isLValueReferenceType())
4861 VDType
= CGF
.getContext().getPointerType(VDType
);
4862 if (isAllocatableDecl(Pair
.first
)) {
4863 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(Pair
.second
.first
);
4864 Address
Replacement(
4866 CGF
.ConvertTypeForMem(CGF
.getContext().getPointerType(VDType
)),
4867 CGF
.getPointerAlign());
4868 Pair
.second
.first
= Replacement
;
4869 Ptr
= CGF
.Builder
.CreateLoad(Replacement
);
4870 Replacement
= Address(Ptr
, CGF
.ConvertTypeForMem(VDType
),
4871 CGF
.getContext().getDeclAlign(Pair
.first
));
4872 Pair
.second
.second
= Replacement
;
4874 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(Pair
.second
.first
);
4875 Address
Replacement(Ptr
, CGF
.ConvertTypeForMem(VDType
),
4876 CGF
.getContext().getDeclAlign(Pair
.first
));
4877 Pair
.second
.first
= Replacement
;
4881 if (Data
.Reductions
) {
4882 OMPPrivateScope
FirstprivateScope(CGF
);
4883 for (const auto &Pair
: FirstprivatePtrs
) {
4884 Address
Replacement(
4885 CGF
.Builder
.CreateLoad(Pair
.second
),
4886 CGF
.ConvertTypeForMem(Pair
.first
->getType().getNonReferenceType()),
4887 CGF
.getContext().getDeclAlign(Pair
.first
));
4888 FirstprivateScope
.addPrivate(Pair
.first
, Replacement
);
4890 (void)FirstprivateScope
.Privatize();
4891 OMPLexicalScope
LexScope(CGF
, S
, CapturedRegion
);
4892 ReductionCodeGen
RedCG(Data
.ReductionVars
, Data
.ReductionVars
,
4893 Data
.ReductionCopies
, Data
.ReductionOps
);
4894 llvm::Value
*ReductionsPtr
= CGF
.Builder
.CreateLoad(
4895 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(9)));
4896 for (unsigned Cnt
= 0, E
= Data
.ReductionVars
.size(); Cnt
< E
; ++Cnt
) {
4897 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
4898 RedCG
.emitAggregateType(CGF
, Cnt
);
4899 // FIXME: This must removed once the runtime library is fixed.
4900 // Emit required threadprivate variables for
4901 // initializer/combiner/finalizer.
4902 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
4904 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
4905 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
4907 Address(CGF
.EmitScalarConversion(
4908 Replacement
.getPointer(), CGF
.getContext().VoidPtrTy
,
4909 CGF
.getContext().getPointerType(
4910 Data
.ReductionCopies
[Cnt
]->getType()),
4911 Data
.ReductionCopies
[Cnt
]->getExprLoc()),
4912 CGF
.ConvertTypeForMem(Data
.ReductionCopies
[Cnt
]->getType()),
4913 Replacement
.getAlignment());
4914 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
4915 Scope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
4918 // Privatize all private variables except for in_reduction items.
4919 (void)Scope
.Privatize();
4920 SmallVector
<const Expr
*, 4> InRedVars
;
4921 SmallVector
<const Expr
*, 4> InRedPrivs
;
4922 SmallVector
<const Expr
*, 4> InRedOps
;
4923 SmallVector
<const Expr
*, 4> TaskgroupDescriptors
;
4924 for (const auto *C
: S
.getClausesOfKind
<OMPInReductionClause
>()) {
4925 auto IPriv
= C
->privates().begin();
4926 auto IRed
= C
->reduction_ops().begin();
4927 auto ITD
= C
->taskgroup_descriptors().begin();
4928 for (const Expr
*Ref
: C
->varlists()) {
4929 InRedVars
.emplace_back(Ref
);
4930 InRedPrivs
.emplace_back(*IPriv
);
4931 InRedOps
.emplace_back(*IRed
);
4932 TaskgroupDescriptors
.emplace_back(*ITD
);
4933 std::advance(IPriv
, 1);
4934 std::advance(IRed
, 1);
4935 std::advance(ITD
, 1);
4938 // Privatize in_reduction items here, because taskgroup descriptors must be
4939 // privatized earlier.
4940 OMPPrivateScope
InRedScope(CGF
);
4941 if (!InRedVars
.empty()) {
4942 ReductionCodeGen
RedCG(InRedVars
, InRedVars
, InRedPrivs
, InRedOps
);
4943 for (unsigned Cnt
= 0, E
= InRedVars
.size(); Cnt
< E
; ++Cnt
) {
4944 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
4945 RedCG
.emitAggregateType(CGF
, Cnt
);
4946 // The taskgroup descriptor variable is always implicit firstprivate and
4947 // privatized already during processing of the firstprivates.
4948 // FIXME: This must removed once the runtime library is fixed.
4949 // Emit required threadprivate variables for
4950 // initializer/combiner/finalizer.
4951 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
4953 llvm::Value
*ReductionsPtr
;
4954 if (const Expr
*TRExpr
= TaskgroupDescriptors
[Cnt
]) {
4955 ReductionsPtr
= CGF
.EmitLoadOfScalar(CGF
.EmitLValue(TRExpr
),
4956 TRExpr
->getExprLoc());
4958 ReductionsPtr
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4960 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
4961 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
4962 Replacement
= Address(
4963 CGF
.EmitScalarConversion(
4964 Replacement
.getPointer(), CGF
.getContext().VoidPtrTy
,
4965 CGF
.getContext().getPointerType(InRedPrivs
[Cnt
]->getType()),
4966 InRedPrivs
[Cnt
]->getExprLoc()),
4967 CGF
.ConvertTypeForMem(InRedPrivs
[Cnt
]->getType()),
4968 Replacement
.getAlignment());
4969 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
4970 InRedScope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
4973 (void)InRedScope
.Privatize();
4975 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII
LocalVarsScope(CGF
,
4980 llvm::Function
*OutlinedFn
= CGM
.getOpenMPRuntime().emitTaskOutlinedFunction(
4981 S
, *I
, *PartId
, *TaskT
, S
.getDirectiveKind(), CodeGen
, Data
.Tied
,
4982 Data
.NumberOfParts
);
4983 OMPLexicalScope
Scope(*this, S
, std::nullopt
,
4984 !isOpenMPParallelDirective(S
.getDirectiveKind()) &&
4985 !isOpenMPSimdDirective(S
.getDirectiveKind()));
4986 TaskGen(*this, OutlinedFn
, Data
);
4989 static ImplicitParamDecl
*
4990 createImplicitFirstprivateForType(ASTContext
&C
, OMPTaskDataTy
&Data
,
4991 QualType Ty
, CapturedDecl
*CD
,
4992 SourceLocation Loc
) {
4993 auto *OrigVD
= ImplicitParamDecl::Create(C
, CD
, Loc
, /*Id=*/nullptr, Ty
,
4994 ImplicitParamDecl::Other
);
4995 auto *OrigRef
= DeclRefExpr::Create(
4996 C
, NestedNameSpecifierLoc(), SourceLocation(), OrigVD
,
4997 /*RefersToEnclosingVariableOrCapture=*/false, Loc
, Ty
, VK_LValue
);
4998 auto *PrivateVD
= ImplicitParamDecl::Create(C
, CD
, Loc
, /*Id=*/nullptr, Ty
,
4999 ImplicitParamDecl::Other
);
5000 auto *PrivateRef
= DeclRefExpr::Create(
5001 C
, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD
,
5002 /*RefersToEnclosingVariableOrCapture=*/false, Loc
, Ty
, VK_LValue
);
5003 QualType ElemType
= C
.getBaseElementType(Ty
);
5004 auto *InitVD
= ImplicitParamDecl::Create(C
, CD
, Loc
, /*Id=*/nullptr, ElemType
,
5005 ImplicitParamDecl::Other
);
5006 auto *InitRef
= DeclRefExpr::Create(
5007 C
, NestedNameSpecifierLoc(), SourceLocation(), InitVD
,
5008 /*RefersToEnclosingVariableOrCapture=*/false, Loc
, ElemType
, VK_LValue
);
5009 PrivateVD
->setInitStyle(VarDecl::CInit
);
5010 PrivateVD
->setInit(ImplicitCastExpr::Create(C
, ElemType
, CK_LValueToRValue
,
5011 InitRef
, /*BasePath=*/nullptr,
5012 VK_PRValue
, FPOptionsOverride()));
5013 Data
.FirstprivateVars
.emplace_back(OrigRef
);
5014 Data
.FirstprivateCopies
.emplace_back(PrivateRef
);
5015 Data
.FirstprivateInits
.emplace_back(InitRef
);
5019 void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5020 const OMPExecutableDirective
&S
, const RegionCodeGenTy
&BodyGen
,
5021 OMPTargetDataInfo
&InputInfo
) {
5022 // Emit outlined function for task construct.
5023 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_task
);
5024 Address CapturedStruct
= GenerateCapturedStmtArgument(*CS
);
5025 QualType SharedsTy
= getContext().getRecordType(CS
->getCapturedRecordDecl());
5026 auto I
= CS
->getCapturedDecl()->param_begin();
5027 auto PartId
= std::next(I
);
5028 auto TaskT
= std::next(I
, 4);
5030 // The task is not final.
5031 Data
.Final
.setInt(/*IntVal=*/false);
5032 // Get list of firstprivate variables.
5033 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
5034 auto IRef
= C
->varlist_begin();
5035 auto IElemInitRef
= C
->inits().begin();
5036 for (auto *IInit
: C
->private_copies()) {
5037 Data
.FirstprivateVars
.push_back(*IRef
);
5038 Data
.FirstprivateCopies
.push_back(IInit
);
5039 Data
.FirstprivateInits
.push_back(*IElemInitRef
);
5044 SmallVector
<const Expr
*, 4> LHSs
;
5045 SmallVector
<const Expr
*, 4> RHSs
;
5046 for (const auto *C
: S
.getClausesOfKind
<OMPInReductionClause
>()) {
5047 Data
.ReductionVars
.append(C
->varlist_begin(), C
->varlist_end());
5048 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
5049 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
5050 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
5051 C
->reduction_ops().end());
5052 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
5053 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
5055 OMPPrivateScope
TargetScope(*this);
5056 VarDecl
*BPVD
= nullptr;
5057 VarDecl
*PVD
= nullptr;
5058 VarDecl
*SVD
= nullptr;
5059 VarDecl
*MVD
= nullptr;
5060 if (InputInfo
.NumberOfTargetItems
> 0) {
5061 auto *CD
= CapturedDecl::Create(
5062 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5063 llvm::APInt
ArrSize(/*numBits=*/32, InputInfo
.NumberOfTargetItems
);
5064 QualType BaseAndPointerAndMapperType
= getContext().getConstantArrayType(
5065 getContext().VoidPtrTy
, ArrSize
, nullptr, ArraySizeModifier::Normal
,
5066 /*IndexTypeQuals=*/0);
5067 BPVD
= createImplicitFirstprivateForType(
5068 getContext(), Data
, BaseAndPointerAndMapperType
, CD
, S
.getBeginLoc());
5069 PVD
= createImplicitFirstprivateForType(
5070 getContext(), Data
, BaseAndPointerAndMapperType
, CD
, S
.getBeginLoc());
5071 QualType SizesType
= getContext().getConstantArrayType(
5072 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5073 ArrSize
, nullptr, ArraySizeModifier::Normal
,
5074 /*IndexTypeQuals=*/0);
5075 SVD
= createImplicitFirstprivateForType(getContext(), Data
, SizesType
, CD
,
5077 TargetScope
.addPrivate(BPVD
, InputInfo
.BasePointersArray
);
5078 TargetScope
.addPrivate(PVD
, InputInfo
.PointersArray
);
5079 TargetScope
.addPrivate(SVD
, InputInfo
.SizesArray
);
5080 // If there is no user-defined mapper, the mapper array will be nullptr. In
5081 // this case, we don't need to privatize it.
5082 if (!isa_and_nonnull
<llvm::ConstantPointerNull
>(
5083 InputInfo
.MappersArray
.getPointer())) {
5084 MVD
= createImplicitFirstprivateForType(
5085 getContext(), Data
, BaseAndPointerAndMapperType
, CD
, S
.getBeginLoc());
5086 TargetScope
.addPrivate(MVD
, InputInfo
.MappersArray
);
5089 (void)TargetScope
.Privatize();
5090 buildDependences(S
, Data
);
5091 auto &&CodeGen
= [&Data
, &S
, CS
, &BodyGen
, BPVD
, PVD
, SVD
, MVD
,
5092 &InputInfo
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5093 // Set proper addresses for generated private copies.
5094 OMPPrivateScope
Scope(CGF
);
5095 if (!Data
.FirstprivateVars
.empty()) {
5096 enum { PrivatesParam
= 2, CopyFnParam
= 3 };
5097 llvm::Value
*CopyFn
= CGF
.Builder
.CreateLoad(
5098 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(CopyFnParam
)));
5099 llvm::Value
*PrivatesPtr
= CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(
5100 CS
->getCapturedDecl()->getParam(PrivatesParam
)));
5102 llvm::SmallVector
<std::pair
<const VarDecl
*, Address
>, 16> PrivatePtrs
;
5103 llvm::SmallVector
<llvm::Value
*, 16> CallArgs
;
5104 llvm::SmallVector
<llvm::Type
*, 4> ParamTypes
;
5105 CallArgs
.push_back(PrivatesPtr
);
5106 ParamTypes
.push_back(PrivatesPtr
->getType());
5107 for (const Expr
*E
: Data
.FirstprivateVars
) {
5108 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
5109 Address PrivatePtr
=
5110 CGF
.CreateMemTemp(CGF
.getContext().getPointerType(E
->getType()),
5111 ".firstpriv.ptr.addr");
5112 PrivatePtrs
.emplace_back(VD
, PrivatePtr
);
5113 CallArgs
.push_back(PrivatePtr
.getPointer());
5114 ParamTypes
.push_back(PrivatePtr
.getType());
5116 auto *CopyFnTy
= llvm::FunctionType::get(CGF
.Builder
.getVoidTy(),
5117 ParamTypes
, /*isVarArg=*/false);
5118 CopyFn
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5119 CopyFn
, CopyFnTy
->getPointerTo());
5120 CGF
.CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(
5121 CGF
, S
.getBeginLoc(), {CopyFnTy
, CopyFn
}, CallArgs
);
5122 for (const auto &Pair
: PrivatePtrs
) {
5123 Address
Replacement(
5124 CGF
.Builder
.CreateLoad(Pair
.second
),
5125 CGF
.ConvertTypeForMem(Pair
.first
->getType().getNonReferenceType()),
5126 CGF
.getContext().getDeclAlign(Pair
.first
));
5127 Scope
.addPrivate(Pair
.first
, Replacement
);
5130 CGF
.processInReduction(S
, Data
, CGF
, CS
, Scope
);
5131 if (InputInfo
.NumberOfTargetItems
> 0) {
5132 InputInfo
.BasePointersArray
= CGF
.Builder
.CreateConstArrayGEP(
5133 CGF
.GetAddrOfLocalVar(BPVD
), /*Index=*/0);
5134 InputInfo
.PointersArray
= CGF
.Builder
.CreateConstArrayGEP(
5135 CGF
.GetAddrOfLocalVar(PVD
), /*Index=*/0);
5136 InputInfo
.SizesArray
= CGF
.Builder
.CreateConstArrayGEP(
5137 CGF
.GetAddrOfLocalVar(SVD
), /*Index=*/0);
5138 // If MVD is nullptr, the mapper array is not privatized
5140 InputInfo
.MappersArray
= CGF
.Builder
.CreateConstArrayGEP(
5141 CGF
.GetAddrOfLocalVar(MVD
), /*Index=*/0);
5145 OMPLexicalScope
LexScope(CGF
, S
, OMPD_task
, /*EmitPreInitStmt=*/false);
5146 auto *TL
= S
.getSingleClause
<OMPThreadLimitClause
>();
5147 if (CGF
.CGM
.getLangOpts().OpenMP
>= 51 &&
5148 needsTaskBasedThreadLimit(S
.getDirectiveKind()) && TL
) {
5149 // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
5150 // enclosing this target region. This will indirectly set the thread_limit
5151 // for every applicable construct within target region.
5152 CGF
.CGM
.getOpenMPRuntime().emitThreadLimitClause(
5153 CGF
, TL
->getThreadLimit(), S
.getBeginLoc());
5157 llvm::Function
*OutlinedFn
= CGM
.getOpenMPRuntime().emitTaskOutlinedFunction(
5158 S
, *I
, *PartId
, *TaskT
, S
.getDirectiveKind(), CodeGen
, /*Tied=*/true,
5159 Data
.NumberOfParts
);
5160 llvm::APInt
TrueOrFalse(32, S
.hasClausesOfKind
<OMPNowaitClause
>() ? 1 : 0);
5161 IntegerLiteral
IfCond(getContext(), TrueOrFalse
,
5162 getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
5164 CGM
.getOpenMPRuntime().emitTaskCall(*this, S
.getBeginLoc(), S
, OutlinedFn
,
5165 SharedsTy
, CapturedStruct
, &IfCond
, Data
);
5168 void CodeGenFunction::processInReduction(const OMPExecutableDirective
&S
,
5169 OMPTaskDataTy
&Data
,
5170 CodeGenFunction
&CGF
,
5171 const CapturedStmt
*CS
,
5172 OMPPrivateScope
&Scope
) {
5173 if (Data
.Reductions
) {
5174 OpenMPDirectiveKind CapturedRegion
= S
.getDirectiveKind();
5175 OMPLexicalScope
LexScope(CGF
, S
, CapturedRegion
);
5176 ReductionCodeGen
RedCG(Data
.ReductionVars
, Data
.ReductionVars
,
5177 Data
.ReductionCopies
, Data
.ReductionOps
);
5178 llvm::Value
*ReductionsPtr
= CGF
.Builder
.CreateLoad(
5179 CGF
.GetAddrOfLocalVar(CS
->getCapturedDecl()->getParam(4)));
5180 for (unsigned Cnt
= 0, E
= Data
.ReductionVars
.size(); Cnt
< E
; ++Cnt
) {
5181 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
5182 RedCG
.emitAggregateType(CGF
, Cnt
);
5183 // FIXME: This must removed once the runtime library is fixed.
5184 // Emit required threadprivate variables for
5185 // initializer/combiner/finalizer.
5186 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
5188 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
5189 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
5191 Address(CGF
.EmitScalarConversion(
5192 Replacement
.getPointer(), CGF
.getContext().VoidPtrTy
,
5193 CGF
.getContext().getPointerType(
5194 Data
.ReductionCopies
[Cnt
]->getType()),
5195 Data
.ReductionCopies
[Cnt
]->getExprLoc()),
5196 CGF
.ConvertTypeForMem(Data
.ReductionCopies
[Cnt
]->getType()),
5197 Replacement
.getAlignment());
5198 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
5199 Scope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
5202 (void)Scope
.Privatize();
5203 SmallVector
<const Expr
*, 4> InRedVars
;
5204 SmallVector
<const Expr
*, 4> InRedPrivs
;
5205 SmallVector
<const Expr
*, 4> InRedOps
;
5206 SmallVector
<const Expr
*, 4> TaskgroupDescriptors
;
5207 for (const auto *C
: S
.getClausesOfKind
<OMPInReductionClause
>()) {
5208 auto IPriv
= C
->privates().begin();
5209 auto IRed
= C
->reduction_ops().begin();
5210 auto ITD
= C
->taskgroup_descriptors().begin();
5211 for (const Expr
*Ref
: C
->varlists()) {
5212 InRedVars
.emplace_back(Ref
);
5213 InRedPrivs
.emplace_back(*IPriv
);
5214 InRedOps
.emplace_back(*IRed
);
5215 TaskgroupDescriptors
.emplace_back(*ITD
);
5216 std::advance(IPriv
, 1);
5217 std::advance(IRed
, 1);
5218 std::advance(ITD
, 1);
5221 OMPPrivateScope
InRedScope(CGF
);
5222 if (!InRedVars
.empty()) {
5223 ReductionCodeGen
RedCG(InRedVars
, InRedVars
, InRedPrivs
, InRedOps
);
5224 for (unsigned Cnt
= 0, E
= InRedVars
.size(); Cnt
< E
; ++Cnt
) {
5225 RedCG
.emitSharedOrigLValue(CGF
, Cnt
);
5226 RedCG
.emitAggregateType(CGF
, Cnt
);
5227 // FIXME: This must removed once the runtime library is fixed.
5228 // Emit required threadprivate variables for
5229 // initializer/combiner/finalizer.
5230 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionFixups(CGF
, S
.getBeginLoc(),
5232 llvm::Value
*ReductionsPtr
;
5233 if (const Expr
*TRExpr
= TaskgroupDescriptors
[Cnt
]) {
5235 CGF
.EmitLoadOfScalar(CGF
.EmitLValue(TRExpr
), TRExpr
->getExprLoc());
5237 ReductionsPtr
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5239 Address Replacement
= CGF
.CGM
.getOpenMPRuntime().getTaskReductionItem(
5240 CGF
, S
.getBeginLoc(), ReductionsPtr
, RedCG
.getSharedLValue(Cnt
));
5241 Replacement
= Address(
5242 CGF
.EmitScalarConversion(
5243 Replacement
.getPointer(), CGF
.getContext().VoidPtrTy
,
5244 CGF
.getContext().getPointerType(InRedPrivs
[Cnt
]->getType()),
5245 InRedPrivs
[Cnt
]->getExprLoc()),
5246 CGF
.ConvertTypeForMem(InRedPrivs
[Cnt
]->getType()),
5247 Replacement
.getAlignment());
5248 Replacement
= RedCG
.adjustPrivateAddress(CGF
, Cnt
, Replacement
);
5249 InRedScope
.addPrivate(RedCG
.getBaseDecl(Cnt
), Replacement
);
5252 (void)InRedScope
.Privatize();
5255 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective
&S
) {
5256 // Emit outlined function for task construct.
5257 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_task
);
5258 Address CapturedStruct
= GenerateCapturedStmtArgument(*CS
);
5259 QualType SharedsTy
= getContext().getRecordType(CS
->getCapturedRecordDecl());
5260 const Expr
*IfCond
= nullptr;
5261 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
5262 if (C
->getNameModifier() == OMPD_unknown
||
5263 C
->getNameModifier() == OMPD_task
) {
5264 IfCond
= C
->getCondition();
5270 // Check if we should emit tied or untied task.
5271 Data
.Tied
= !S
.getSingleClause
<OMPUntiedClause
>();
5272 auto &&BodyGen
= [CS
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5273 CGF
.EmitStmt(CS
->getCapturedStmt());
5275 auto &&TaskGen
= [&S
, SharedsTy
, CapturedStruct
,
5276 IfCond
](CodeGenFunction
&CGF
, llvm::Function
*OutlinedFn
,
5277 const OMPTaskDataTy
&Data
) {
5278 CGF
.CGM
.getOpenMPRuntime().emitTaskCall(CGF
, S
.getBeginLoc(), S
, OutlinedFn
,
5279 SharedsTy
, CapturedStruct
, IfCond
,
5283 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
5284 EmitOMPTaskBasedDirective(S
, OMPD_task
, BodyGen
, TaskGen
, Data
);
5287 void CodeGenFunction::EmitOMPTaskyieldDirective(
5288 const OMPTaskyieldDirective
&S
) {
5289 CGM
.getOpenMPRuntime().emitTaskyieldCall(*this, S
.getBeginLoc());
5292 void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective
&S
) {
5293 const OMPMessageClause
*MC
= S
.getSingleClause
<OMPMessageClause
>();
5294 Expr
*ME
= MC
? MC
->getMessageString() : nullptr;
5295 const OMPSeverityClause
*SC
= S
.getSingleClause
<OMPSeverityClause
>();
5296 bool IsFatal
= false;
5297 if (!SC
|| SC
->getSeverityKind() == OMPC_SEVERITY_fatal
)
5299 CGM
.getOpenMPRuntime().emitErrorCall(*this, S
.getBeginLoc(), ME
, IsFatal
);
5302 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective
&S
) {
5303 CGM
.getOpenMPRuntime().emitBarrierCall(*this, S
.getBeginLoc(), OMPD_barrier
);
5306 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective
&S
) {
5308 // Build list of dependences
5309 buildDependences(S
, Data
);
5310 Data
.HasNowaitClause
= S
.hasClausesOfKind
<OMPNowaitClause
>();
5311 CGM
.getOpenMPRuntime().emitTaskwaitCall(*this, S
.getBeginLoc(), Data
);
5314 bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective
&T
) {
5315 return T
.clauses().empty();
5318 void CodeGenFunction::EmitOMPTaskgroupDirective(
5319 const OMPTaskgroupDirective
&S
) {
5320 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5321 if (CGM
.getLangOpts().OpenMPIRBuilder
&& isSupportedByOpenMPIRBuilder(S
)) {
5322 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
5323 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
5324 InsertPointTy
AllocaIP(AllocaInsertPt
->getParent(),
5325 AllocaInsertPt
->getIterator());
5327 auto BodyGenCB
= [&, this](InsertPointTy AllocaIP
,
5328 InsertPointTy CodeGenIP
) {
5329 Builder
.restoreIP(CodeGenIP
);
5330 EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
5332 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo
;
5333 if (!CapturedStmtInfo
)
5334 CapturedStmtInfo
= &CapStmtInfo
;
5335 Builder
.restoreIP(OMPBuilder
.createTaskgroup(Builder
, AllocaIP
, BodyGenCB
));
5338 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5340 if (const Expr
*E
= S
.getReductionRef()) {
5341 SmallVector
<const Expr
*, 4> LHSs
;
5342 SmallVector
<const Expr
*, 4> RHSs
;
5344 for (const auto *C
: S
.getClausesOfKind
<OMPTaskReductionClause
>()) {
5345 Data
.ReductionVars
.append(C
->varlist_begin(), C
->varlist_end());
5346 Data
.ReductionOrigs
.append(C
->varlist_begin(), C
->varlist_end());
5347 Data
.ReductionCopies
.append(C
->privates().begin(), C
->privates().end());
5348 Data
.ReductionOps
.append(C
->reduction_ops().begin(),
5349 C
->reduction_ops().end());
5350 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
5351 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
5353 llvm::Value
*ReductionDesc
=
5354 CGF
.CGM
.getOpenMPRuntime().emitTaskReductionInit(CGF
, S
.getBeginLoc(),
5356 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
5357 CGF
.EmitVarDecl(*VD
);
5358 CGF
.EmitStoreOfScalar(ReductionDesc
, CGF
.GetAddrOfLocalVar(VD
),
5359 /*Volatile=*/false, E
->getType());
5361 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
5363 CGM
.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen
, S
.getBeginLoc());
5366 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective
&S
) {
5367 llvm::AtomicOrdering AO
= S
.getSingleClause
<OMPFlushClause
>()
5368 ? llvm::AtomicOrdering::NotAtomic
5369 : llvm::AtomicOrdering::AcquireRelease
;
5370 CGM
.getOpenMPRuntime().emitFlush(
5372 [&S
]() -> ArrayRef
<const Expr
*> {
5373 if (const auto *FlushClause
= S
.getSingleClause
<OMPFlushClause
>())
5374 return llvm::ArrayRef(FlushClause
->varlist_begin(),
5375 FlushClause
->varlist_end());
5376 return std::nullopt
;
5378 S
.getBeginLoc(), AO
);
5381 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective
&S
) {
5382 const auto *DO
= S
.getSingleClause
<OMPDepobjClause
>();
5383 LValue DOLVal
= EmitLValue(DO
->getDepobj());
5384 if (const auto *DC
= S
.getSingleClause
<OMPDependClause
>()) {
5385 OMPTaskDataTy::DependData
Dependencies(DC
->getDependencyKind(),
5387 Dependencies
.DepExprs
.append(DC
->varlist_begin(), DC
->varlist_end());
5388 Address DepAddr
= CGM
.getOpenMPRuntime().emitDepobjDependClause(
5389 *this, Dependencies
, DC
->getBeginLoc());
5390 EmitStoreOfScalar(DepAddr
.getPointer(), DOLVal
);
5393 if (const auto *DC
= S
.getSingleClause
<OMPDestroyClause
>()) {
5394 CGM
.getOpenMPRuntime().emitDestroyClause(*this, DOLVal
, DC
->getBeginLoc());
5397 if (const auto *UC
= S
.getSingleClause
<OMPUpdateClause
>()) {
5398 CGM
.getOpenMPRuntime().emitUpdateClause(
5399 *this, DOLVal
, UC
->getDependencyKind(), UC
->getBeginLoc());
5404 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective
&S
) {
5405 if (!OMPParentLoopDirectiveForScan
)
5407 const OMPExecutableDirective
&ParentDir
= *OMPParentLoopDirectiveForScan
;
5408 bool IsInclusive
= S
.hasClausesOfKind
<OMPInclusiveClause
>();
5409 SmallVector
<const Expr
*, 4> Shareds
;
5410 SmallVector
<const Expr
*, 4> Privates
;
5411 SmallVector
<const Expr
*, 4> LHSs
;
5412 SmallVector
<const Expr
*, 4> RHSs
;
5413 SmallVector
<const Expr
*, 4> ReductionOps
;
5414 SmallVector
<const Expr
*, 4> CopyOps
;
5415 SmallVector
<const Expr
*, 4> CopyArrayTemps
;
5416 SmallVector
<const Expr
*, 4> CopyArrayElems
;
5417 for (const auto *C
: ParentDir
.getClausesOfKind
<OMPReductionClause
>()) {
5418 if (C
->getModifier() != OMPC_REDUCTION_inscan
)
5420 Shareds
.append(C
->varlist_begin(), C
->varlist_end());
5421 Privates
.append(C
->privates().begin(), C
->privates().end());
5422 LHSs
.append(C
->lhs_exprs().begin(), C
->lhs_exprs().end());
5423 RHSs
.append(C
->rhs_exprs().begin(), C
->rhs_exprs().end());
5424 ReductionOps
.append(C
->reduction_ops().begin(), C
->reduction_ops().end());
5425 CopyOps
.append(C
->copy_ops().begin(), C
->copy_ops().end());
5426 CopyArrayTemps
.append(C
->copy_array_temps().begin(),
5427 C
->copy_array_temps().end());
5428 CopyArrayElems
.append(C
->copy_array_elems().begin(),
5429 C
->copy_array_elems().end());
5431 if (ParentDir
.getDirectiveKind() == OMPD_simd
||
5432 (getLangOpts().OpenMPSimd
&&
5433 isOpenMPSimdDirective(ParentDir
.getDirectiveKind()))) {
5434 // For simd directive and simd-based directives in simd only mode, use the
5435 // following codegen:
5437 // #pragma omp simd reduction(inscan, +: x)
5440 // #pragma omp scan inclusive(x)
5443 // is transformed to:
5454 // #pragma omp simd reduction(inscan, +: x)
5457 // #pragma omp scan exclusive(x)
5470 llvm::BasicBlock
*OMPScanReduce
= createBasicBlock("omp.inscan.reduce");
5471 EmitBranch(IsInclusive
5473 : BreakContinueStack
.back().ContinueBlock
.getBlock());
5474 EmitBlock(OMPScanDispatch
);
5476 // New scope for correct construction/destruction of temp variables for
5478 LexicalScope
Scope(*this, S
.getSourceRange());
5479 EmitBranch(IsInclusive
? OMPBeforeScanBlock
: OMPAfterScanBlock
);
5480 EmitBlock(OMPScanReduce
);
5482 // Create temp var and copy LHS value to this temp value.
5484 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5485 const Expr
*PrivateExpr
= Privates
[I
];
5486 const Expr
*TempExpr
= CopyArrayTemps
[I
];
5488 *cast
<VarDecl
>(cast
<DeclRefExpr
>(TempExpr
)->getDecl()));
5489 LValue DestLVal
= EmitLValue(TempExpr
);
5490 LValue SrcLVal
= EmitLValue(LHSs
[I
]);
5491 EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(*this),
5492 SrcLVal
.getAddress(*this),
5493 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5494 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
5498 CGM
.getOpenMPRuntime().emitReduction(
5499 *this, ParentDir
.getEndLoc(), Privates
, LHSs
, RHSs
, ReductionOps
,
5500 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd
});
5501 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5502 const Expr
*PrivateExpr
= Privates
[I
];
5506 DestLVal
= EmitLValue(RHSs
[I
]);
5507 SrcLVal
= EmitLValue(LHSs
[I
]);
5509 const Expr
*TempExpr
= CopyArrayTemps
[I
];
5510 DestLVal
= EmitLValue(RHSs
[I
]);
5511 SrcLVal
= EmitLValue(TempExpr
);
5513 EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(*this),
5514 SrcLVal
.getAddress(*this),
5515 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5516 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
5520 EmitBranch(IsInclusive
? OMPAfterScanBlock
: OMPBeforeScanBlock
);
5521 OMPScanExitBlock
= IsInclusive
5522 ? BreakContinueStack
.back().ContinueBlock
.getBlock()
5524 EmitBlock(OMPAfterScanBlock
);
5528 EmitBranch(BreakContinueStack
.back().ContinueBlock
.getBlock());
5529 EmitBlock(OMPScanExitBlock
);
5531 if (OMPFirstScanLoop
) {
5532 // Emit buffer[i] = red; at the end of the input phase.
5533 const auto *IVExpr
= cast
<OMPLoopDirective
>(ParentDir
)
5534 .getIterationVariable()
5535 ->IgnoreParenImpCasts();
5536 LValue IdxLVal
= EmitLValue(IVExpr
);
5537 llvm::Value
*IdxVal
= EmitLoadOfScalar(IdxLVal
, IVExpr
->getExprLoc());
5538 IdxVal
= Builder
.CreateIntCast(IdxVal
, SizeTy
, /*isSigned=*/false);
5539 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5540 const Expr
*PrivateExpr
= Privates
[I
];
5541 const Expr
*OrigExpr
= Shareds
[I
];
5542 const Expr
*CopyArrayElem
= CopyArrayElems
[I
];
5543 OpaqueValueMapping
IdxMapping(
5545 cast
<OpaqueValueExpr
>(
5546 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
5547 RValue::get(IdxVal
));
5548 LValue DestLVal
= EmitLValue(CopyArrayElem
);
5549 LValue SrcLVal
= EmitLValue(OrigExpr
);
5550 EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(*this),
5551 SrcLVal
.getAddress(*this),
5552 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5553 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
5557 EmitBranch(BreakContinueStack
.back().ContinueBlock
.getBlock());
5559 EmitBlock(OMPScanExitBlock
);
5560 EmitBranch(BreakContinueStack
.back().ContinueBlock
.getBlock());
5562 EmitBlock(OMPScanDispatch
);
5563 if (!OMPFirstScanLoop
) {
5564 // Emit red = buffer[i]; at the entrance to the scan phase.
5565 const auto *IVExpr
= cast
<OMPLoopDirective
>(ParentDir
)
5566 .getIterationVariable()
5567 ->IgnoreParenImpCasts();
5568 LValue IdxLVal
= EmitLValue(IVExpr
);
5569 llvm::Value
*IdxVal
= EmitLoadOfScalar(IdxLVal
, IVExpr
->getExprLoc());
5570 IdxVal
= Builder
.CreateIntCast(IdxVal
, SizeTy
, /*isSigned=*/false);
5571 llvm::BasicBlock
*ExclusiveExitBB
= nullptr;
5573 llvm::BasicBlock
*ContBB
= createBasicBlock("omp.exclusive.dec");
5574 ExclusiveExitBB
= createBasicBlock("omp.exclusive.copy.exit");
5575 llvm::Value
*Cmp
= Builder
.CreateIsNull(IdxVal
);
5576 Builder
.CreateCondBr(Cmp
, ExclusiveExitBB
, ContBB
);
5578 // Use idx - 1 iteration for exclusive scan.
5579 IdxVal
= Builder
.CreateNUWSub(IdxVal
, llvm::ConstantInt::get(SizeTy
, 1));
5581 for (unsigned I
= 0, E
= CopyArrayElems
.size(); I
< E
; ++I
) {
5582 const Expr
*PrivateExpr
= Privates
[I
];
5583 const Expr
*OrigExpr
= Shareds
[I
];
5584 const Expr
*CopyArrayElem
= CopyArrayElems
[I
];
5585 OpaqueValueMapping
IdxMapping(
5587 cast
<OpaqueValueExpr
>(
5588 cast
<ArraySubscriptExpr
>(CopyArrayElem
)->getIdx()),
5589 RValue::get(IdxVal
));
5590 LValue SrcLVal
= EmitLValue(CopyArrayElem
);
5591 LValue DestLVal
= EmitLValue(OrigExpr
);
5592 EmitOMPCopy(PrivateExpr
->getType(), DestLVal
.getAddress(*this),
5593 SrcLVal
.getAddress(*this),
5594 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSs
[I
])->getDecl()),
5595 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSs
[I
])->getDecl()),
5599 EmitBlock(ExclusiveExitBB
);
5602 EmitBranch((OMPFirstScanLoop
== IsInclusive
) ? OMPBeforeScanBlock
5603 : OMPAfterScanBlock
);
5604 EmitBlock(OMPAfterScanBlock
);
5607 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective
&S
,
5608 const CodeGenLoopTy
&CodeGenLoop
,
5610 // Emit the loop iteration variable.
5611 const auto *IVExpr
= cast
<DeclRefExpr
>(S
.getIterationVariable());
5612 const auto *IVDecl
= cast
<VarDecl
>(IVExpr
->getDecl());
5613 EmitVarDecl(*IVDecl
);
5615 // Emit the iterations count variable.
5616 // If it is not a variable, Sema decided to calculate iterations count on each
5617 // iteration (e.g., it is foldable into a constant).
5618 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
5619 EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
5620 // Emit calculation of the iterations count.
5621 EmitIgnoredExpr(S
.getCalcLastIteration());
5624 CGOpenMPRuntime
&RT
= CGM
.getOpenMPRuntime();
5626 bool HasLastprivateClause
= false;
5627 // Check pre-condition.
5629 OMPLoopScope
PreInitScope(*this, S
);
5630 // Skip the entire loop if we don't meet the precondition.
5631 // If the condition constant folds and can be elided, avoid emitting the
5634 llvm::BasicBlock
*ContBlock
= nullptr;
5635 if (ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
5639 llvm::BasicBlock
*ThenBlock
= createBasicBlock("omp.precond.then");
5640 ContBlock
= createBasicBlock("omp.precond.end");
5641 emitPreCond(*this, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
5642 getProfileCount(&S
));
5643 EmitBlock(ThenBlock
);
5644 incrementProfileCounter(&S
);
5647 emitAlignedClause(*this, S
);
5648 // Emit 'then' code.
5650 // Emit helper vars inits.
5652 LValue LB
= EmitOMPHelperVar(
5653 *this, cast
<DeclRefExpr
>(
5654 (isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5655 ? S
.getCombinedLowerBoundVariable()
5656 : S
.getLowerBoundVariable())));
5657 LValue UB
= EmitOMPHelperVar(
5658 *this, cast
<DeclRefExpr
>(
5659 (isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5660 ? S
.getCombinedUpperBoundVariable()
5661 : S
.getUpperBoundVariable())));
5663 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getStrideVariable()));
5665 EmitOMPHelperVar(*this, cast
<DeclRefExpr
>(S
.getIsLastIterVariable()));
5667 OMPPrivateScope
LoopScope(*this);
5668 if (EmitOMPFirstprivateClause(S
, LoopScope
)) {
5669 // Emit implicit barrier to synchronize threads and avoid data races
5670 // on initialization of firstprivate variables and post-update of
5671 // lastprivate variables.
5672 CGM
.getOpenMPRuntime().emitBarrierCall(
5673 *this, S
.getBeginLoc(), OMPD_unknown
, /*EmitChecks=*/false,
5674 /*ForceSimpleCall=*/true);
5676 EmitOMPPrivateClause(S
, LoopScope
);
5677 if (isOpenMPSimdDirective(S
.getDirectiveKind()) &&
5678 !isOpenMPParallelDirective(S
.getDirectiveKind()) &&
5679 !isOpenMPTeamsDirective(S
.getDirectiveKind()))
5680 EmitOMPReductionClauseInit(S
, LoopScope
);
5681 HasLastprivateClause
= EmitOMPLastprivateClauseInit(S
, LoopScope
);
5682 EmitOMPPrivateLoopCounters(S
, LoopScope
);
5683 (void)LoopScope
.Privatize();
5684 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
5685 CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S
);
5687 // Detect the distribute schedule kind and chunk.
5688 llvm::Value
*Chunk
= nullptr;
5689 OpenMPDistScheduleClauseKind ScheduleKind
= OMPC_DIST_SCHEDULE_unknown
;
5690 if (const auto *C
= S
.getSingleClause
<OMPDistScheduleClause
>()) {
5691 ScheduleKind
= C
->getDistScheduleKind();
5692 if (const Expr
*Ch
= C
->getChunkSize()) {
5693 Chunk
= EmitScalarExpr(Ch
);
5694 Chunk
= EmitScalarConversion(Chunk
, Ch
->getType(),
5695 S
.getIterationVariable()->getType(),
5699 // Default behaviour for dist_schedule clause.
5700 CGM
.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5701 *this, S
, ScheduleKind
, Chunk
);
5703 const unsigned IVSize
= getContext().getTypeSize(IVExpr
->getType());
5704 const bool IVSigned
= IVExpr
->getType()->hasSignedIntegerRepresentation();
5706 // OpenMP [2.10.8, distribute Construct, Description]
5707 // If dist_schedule is specified, kind must be static. If specified,
5708 // iterations are divided into chunks of size chunk_size, chunks are
5709 // assigned to the teams of the league in a round-robin fashion in the
5710 // order of the team number. When no chunk_size is specified, the
5711 // iteration space is divided into chunks that are approximately equal
5712 // in size, and at most one chunk is distributed to each team of the
5713 // league. The size of the chunks is unspecified in this case.
5714 bool StaticChunked
=
5715 RT
.isStaticChunked(ScheduleKind
, /* Chunked */ Chunk
!= nullptr) &&
5716 isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind());
5717 if (RT
.isStaticNonchunked(ScheduleKind
,
5718 /* Chunked */ Chunk
!= nullptr) ||
5720 CGOpenMPRuntime::StaticRTInput
StaticInit(
5721 IVSize
, IVSigned
, /* Ordered = */ false, IL
.getAddress(*this),
5722 LB
.getAddress(*this), UB
.getAddress(*this), ST
.getAddress(*this),
5723 StaticChunked
? Chunk
: nullptr);
5724 RT
.emitDistributeStaticInit(*this, S
.getBeginLoc(), ScheduleKind
,
5727 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
5728 // UB = min(UB, GlobalUB);
5729 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5730 ? S
.getCombinedEnsureUpperBound()
5731 : S
.getEnsureUpperBound());
5733 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5734 ? S
.getCombinedInit()
5738 isOpenMPLoopBoundSharingDirective(S
.getDirectiveKind())
5739 ? S
.getCombinedCond()
5743 Cond
= S
.getCombinedDistCond();
5745 // For static unchunked schedules generate:
5747 // 1. For distribute alone, codegen
5748 // while (idx <= UB) {
5753 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
5754 // while (idx <= UB) {
5755 // <CodeGen rest of pragma>(LB, UB);
5759 // For static chunk one schedule generate:
5761 // while (IV <= GlobalUB) {
5762 // <CodeGen rest of pragma>(LB, UB);
5765 // UB = min(UB, GlobalUB);
5771 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5772 if (isOpenMPSimdDirective(S
.getDirectiveKind()))
5773 CGF
.EmitOMPSimdInit(S
);
5775 [&S
, &LoopScope
, Cond
, IncExpr
, LoopExit
, &CodeGenLoop
,
5776 StaticChunked
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5777 CGF
.EmitOMPInnerLoop(
5778 S
, LoopScope
.requiresCleanups(), Cond
, IncExpr
,
5779 [&S
, LoopExit
, &CodeGenLoop
](CodeGenFunction
&CGF
) {
5780 CodeGenLoop(CGF
, S
, LoopExit
);
5782 [&S
, StaticChunked
](CodeGenFunction
&CGF
) {
5783 if (StaticChunked
) {
5784 CGF
.EmitIgnoredExpr(S
.getCombinedNextLowerBound());
5785 CGF
.EmitIgnoredExpr(S
.getCombinedNextUpperBound());
5786 CGF
.EmitIgnoredExpr(S
.getCombinedEnsureUpperBound());
5787 CGF
.EmitIgnoredExpr(S
.getCombinedInit());
5791 EmitBlock(LoopExit
.getBlock());
5792 // Tell the runtime we are done.
5793 RT
.emitForStaticFinish(*this, S
.getEndLoc(), S
.getDirectiveKind());
5795 // Emit the outer loop, which requests its work chunk [LB..UB] from
5796 // runtime and runs the inner loop to process it.
5797 const OMPLoopArguments LoopArguments
= {
5798 LB
.getAddress(*this), UB
.getAddress(*this), ST
.getAddress(*this),
5799 IL
.getAddress(*this), Chunk
};
5800 EmitOMPDistributeOuterLoop(ScheduleKind
, S
, LoopScope
, LoopArguments
,
5803 if (isOpenMPSimdDirective(S
.getDirectiveKind())) {
5804 EmitOMPSimdFinal(S
, [IL
, &S
](CodeGenFunction
&CGF
) {
5805 return CGF
.Builder
.CreateIsNotNull(
5806 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
5809 if (isOpenMPSimdDirective(S
.getDirectiveKind()) &&
5810 !isOpenMPParallelDirective(S
.getDirectiveKind()) &&
5811 !isOpenMPTeamsDirective(S
.getDirectiveKind())) {
5812 EmitOMPReductionClauseFinal(S
, OMPD_simd
);
5813 // Emit post-update of the reduction variables if IsLastIter != 0.
5814 emitPostUpdateForReductionClause(
5815 *this, S
, [IL
, &S
](CodeGenFunction
&CGF
) {
5816 return CGF
.Builder
.CreateIsNotNull(
5817 CGF
.EmitLoadOfScalar(IL
, S
.getBeginLoc()));
5820 // Emit final copy of the lastprivate variables if IsLastIter != 0.
5821 if (HasLastprivateClause
) {
5822 EmitOMPLastprivateClauseFinal(
5823 S
, /*NoFinals=*/false,
5824 Builder
.CreateIsNotNull(EmitLoadOfScalar(IL
, S
.getBeginLoc())));
5828 // We're now done with the loop, so jump to the continuation block.
5830 EmitBranch(ContBlock
);
5831 EmitBlock(ContBlock
, true);
5836 void CodeGenFunction::EmitOMPDistributeDirective(
5837 const OMPDistributeDirective
&S
) {
5838 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5839 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
5841 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5842 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute
, CodeGen
);
5845 static llvm::Function
*emitOutlinedOrderedFunction(CodeGenModule
&CGM
,
5846 const CapturedStmt
*S
,
5847 SourceLocation Loc
) {
5848 CodeGenFunction
CGF(CGM
, /*suppressNewContext=*/true);
5849 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo
;
5850 CGF
.CapturedStmtInfo
= &CapStmtInfo
;
5851 llvm::Function
*Fn
= CGF
.GenerateOpenMPCapturedStmtFunction(*S
, Loc
);
5852 Fn
->setDoesNotRecurse();
5856 template <typename T
>
5857 static void emitRestoreIP(CodeGenFunction
&CGF
, const T
*C
,
5858 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP
,
5859 llvm::OpenMPIRBuilder
&OMPBuilder
) {
5861 unsigned NumLoops
= C
->getNumLoops();
5862 QualType Int64Ty
= CGF
.CGM
.getContext().getIntTypeForBitwidth(
5863 /*DestWidth=*/64, /*Signed=*/1);
5864 llvm::SmallVector
<llvm::Value
*> StoreValues
;
5865 for (unsigned I
= 0; I
< NumLoops
; I
++) {
5866 const Expr
*CounterVal
= C
->getLoopData(I
);
5868 llvm::Value
*StoreValue
= CGF
.EmitScalarConversion(
5869 CGF
.EmitScalarExpr(CounterVal
), CounterVal
->getType(), Int64Ty
,
5870 CounterVal
->getExprLoc());
5871 StoreValues
.emplace_back(StoreValue
);
5873 OMPDoacrossKind
<T
> ODK
;
5874 bool IsDependSource
= ODK
.isSource(C
);
5875 CGF
.Builder
.restoreIP(
5876 OMPBuilder
.createOrderedDepend(CGF
.Builder
, AllocaIP
, NumLoops
,
5877 StoreValues
, ".cnt.addr", IsDependSource
));
5880 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective
&S
) {
5881 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
5882 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
5883 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
5885 if (S
.hasClausesOfKind
<OMPDependClause
>() ||
5886 S
.hasClausesOfKind
<OMPDoacrossClause
>()) {
5887 // The ordered directive with depend clause.
5888 assert(!S
.hasAssociatedStmt() && "No associated statement must be in "
5889 "ordered depend|doacross construct.");
5890 InsertPointTy
AllocaIP(AllocaInsertPt
->getParent(),
5891 AllocaInsertPt
->getIterator());
5892 for (const auto *DC
: S
.getClausesOfKind
<OMPDependClause
>())
5893 emitRestoreIP(*this, DC
, AllocaIP
, OMPBuilder
);
5894 for (const auto *DC
: S
.getClausesOfKind
<OMPDoacrossClause
>())
5895 emitRestoreIP(*this, DC
, AllocaIP
, OMPBuilder
);
5897 // The ordered directive with threads or simd clause, or without clause.
5898 // Without clause, it behaves as if the threads clause is specified.
5899 const auto *C
= S
.getSingleClause
<OMPSIMDClause
>();
5901 auto FiniCB
= [this](InsertPointTy IP
) {
5902 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP
);
5905 auto BodyGenCB
= [&S
, C
, this](InsertPointTy AllocaIP
,
5906 InsertPointTy CodeGenIP
) {
5907 Builder
.restoreIP(CodeGenIP
);
5909 const CapturedStmt
*CS
= S
.getInnermostCapturedStmt();
5911 llvm::BasicBlock
*FiniBB
= splitBBWithSuffix(
5912 Builder
, /*CreateBranch=*/false, ".ordered.after");
5913 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
5914 GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
5915 llvm::Function
*OutlinedFn
=
5916 emitOutlinedOrderedFunction(CGM
, CS
, S
.getBeginLoc());
5917 assert(S
.getBeginLoc().isValid() &&
5918 "Outlined function call location must be valid.");
5919 ApplyDebugLocation::CreateDefaultArtificial(*this, S
.getBeginLoc());
5920 OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP
, *FiniBB
,
5921 OutlinedFn
, CapturedVars
);
5923 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
5924 *this, CS
->getCapturedStmt(), AllocaIP
, CodeGenIP
, "ordered");
5928 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5930 OMPBuilder
.createOrderedThreadsSimd(Builder
, BodyGenCB
, FiniCB
, !C
));
5935 if (S
.hasClausesOfKind
<OMPDependClause
>()) {
5936 assert(!S
.hasAssociatedStmt() &&
5937 "No associated statement must be in ordered depend construct.");
5938 for (const auto *DC
: S
.getClausesOfKind
<OMPDependClause
>())
5939 CGM
.getOpenMPRuntime().emitDoacrossOrdered(*this, DC
);
5942 if (S
.hasClausesOfKind
<OMPDoacrossClause
>()) {
5943 assert(!S
.hasAssociatedStmt() &&
5944 "No associated statement must be in ordered doacross construct.");
5945 for (const auto *DC
: S
.getClausesOfKind
<OMPDoacrossClause
>())
5946 CGM
.getOpenMPRuntime().emitDoacrossOrdered(*this, DC
);
5949 const auto *C
= S
.getSingleClause
<OMPSIMDClause
>();
5950 auto &&CodeGen
= [&S
, C
, this](CodeGenFunction
&CGF
,
5951 PrePostActionTy
&Action
) {
5952 const CapturedStmt
*CS
= S
.getInnermostCapturedStmt();
5954 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
5955 CGF
.GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
5956 llvm::Function
*OutlinedFn
=
5957 emitOutlinedOrderedFunction(CGM
, CS
, S
.getBeginLoc());
5958 CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, S
.getBeginLoc(),
5959 OutlinedFn
, CapturedVars
);
5962 CGF
.EmitStmt(CS
->getCapturedStmt());
5965 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
5966 CGM
.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen
, S
.getBeginLoc(), !C
);
5969 static llvm::Value
*convertToScalarValue(CodeGenFunction
&CGF
, RValue Val
,
5970 QualType SrcType
, QualType DestType
,
5971 SourceLocation Loc
) {
5972 assert(CGF
.hasScalarEvaluationKind(DestType
) &&
5973 "DestType must have scalar evaluation kind.");
5974 assert(!Val
.isAggregate() && "Must be a scalar or complex.");
5975 return Val
.isScalar() ? CGF
.EmitScalarConversion(Val
.getScalarVal(), SrcType
,
5977 : CGF
.EmitComplexToScalarConversion(
5978 Val
.getComplexVal(), SrcType
, DestType
, Loc
);
5981 static CodeGenFunction::ComplexPairTy
5982 convertToComplexValue(CodeGenFunction
&CGF
, RValue Val
, QualType SrcType
,
5983 QualType DestType
, SourceLocation Loc
) {
5984 assert(CGF
.getEvaluationKind(DestType
) == TEK_Complex
&&
5985 "DestType must have complex evaluation kind.");
5986 CodeGenFunction::ComplexPairTy ComplexVal
;
5987 if (Val
.isScalar()) {
5988 // Convert the input element to the element type of the complex.
5989 QualType DestElementType
=
5990 DestType
->castAs
<ComplexType
>()->getElementType();
5991 llvm::Value
*ScalarVal
= CGF
.EmitScalarConversion(
5992 Val
.getScalarVal(), SrcType
, DestElementType
, Loc
);
5993 ComplexVal
= CodeGenFunction::ComplexPairTy(
5994 ScalarVal
, llvm::Constant::getNullValue(ScalarVal
->getType()));
5996 assert(Val
.isComplex() && "Must be a scalar or complex.");
5997 QualType SrcElementType
= SrcType
->castAs
<ComplexType
>()->getElementType();
5998 QualType DestElementType
=
5999 DestType
->castAs
<ComplexType
>()->getElementType();
6000 ComplexVal
.first
= CGF
.EmitScalarConversion(
6001 Val
.getComplexVal().first
, SrcElementType
, DestElementType
, Loc
);
6002 ComplexVal
.second
= CGF
.EmitScalarConversion(
6003 Val
.getComplexVal().second
, SrcElementType
, DestElementType
, Loc
);
6008 static void emitSimpleAtomicStore(CodeGenFunction
&CGF
, llvm::AtomicOrdering AO
,
6009 LValue LVal
, RValue RVal
) {
6010 if (LVal
.isGlobalReg())
6011 CGF
.EmitStoreThroughGlobalRegLValue(RVal
, LVal
);
6013 CGF
.EmitAtomicStore(RVal
, LVal
, AO
, LVal
.isVolatile(), /*isInit=*/false);
6016 static RValue
emitSimpleAtomicLoad(CodeGenFunction
&CGF
,
6017 llvm::AtomicOrdering AO
, LValue LVal
,
6018 SourceLocation Loc
) {
6019 if (LVal
.isGlobalReg())
6020 return CGF
.EmitLoadOfLValue(LVal
, Loc
);
6021 return CGF
.EmitAtomicLoad(
6022 LVal
, Loc
, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO
),
6026 void CodeGenFunction::emitOMPSimpleStore(LValue LVal
, RValue RVal
,
6027 QualType RValTy
, SourceLocation Loc
) {
6028 switch (getEvaluationKind(LVal
.getType())) {
6030 EmitStoreThroughLValue(RValue::get(convertToScalarValue(
6031 *this, RVal
, RValTy
, LVal
.getType(), Loc
)),
6036 convertToComplexValue(*this, RVal
, RValTy
, LVal
.getType(), Loc
), LVal
,
6040 llvm_unreachable("Must be a scalar or complex.");
6044 static void emitOMPAtomicReadExpr(CodeGenFunction
&CGF
, llvm::AtomicOrdering AO
,
6045 const Expr
*X
, const Expr
*V
,
6046 SourceLocation Loc
) {
6048 assert(V
->isLValue() && "V of 'omp atomic read' is not lvalue");
6049 assert(X
->isLValue() && "X of 'omp atomic read' is not lvalue");
6050 LValue XLValue
= CGF
.EmitLValue(X
);
6051 LValue VLValue
= CGF
.EmitLValue(V
);
6052 RValue Res
= emitSimpleAtomicLoad(CGF
, AO
, XLValue
, Loc
);
6053 // OpenMP, 2.17.7, atomic Construct
6054 // If the read or capture clause is specified and the acquire, acq_rel, or
6055 // seq_cst clause is specified then the strong flush on exit from the atomic
6056 // operation is also an acquire flush.
6058 case llvm::AtomicOrdering::Acquire
:
6059 case llvm::AtomicOrdering::AcquireRelease
:
6060 case llvm::AtomicOrdering::SequentiallyConsistent
:
6061 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6062 llvm::AtomicOrdering::Acquire
);
6064 case llvm::AtomicOrdering::Monotonic
:
6065 case llvm::AtomicOrdering::Release
:
6067 case llvm::AtomicOrdering::NotAtomic
:
6068 case llvm::AtomicOrdering::Unordered
:
6069 llvm_unreachable("Unexpected ordering.");
6071 CGF
.emitOMPSimpleStore(VLValue
, Res
, X
->getType().getNonReferenceType(), Loc
);
6072 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, V
);
6075 static void emitOMPAtomicWriteExpr(CodeGenFunction
&CGF
,
6076 llvm::AtomicOrdering AO
, const Expr
*X
,
6077 const Expr
*E
, SourceLocation Loc
) {
6079 assert(X
->isLValue() && "X of 'omp atomic write' is not lvalue");
6080 emitSimpleAtomicStore(CGF
, AO
, CGF
.EmitLValue(X
), CGF
.EmitAnyExpr(E
));
6081 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6082 // OpenMP, 2.17.7, atomic Construct
6083 // If the write, update, or capture clause is specified and the release,
6084 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6085 // the atomic operation is also a release flush.
6087 case llvm::AtomicOrdering::Release
:
6088 case llvm::AtomicOrdering::AcquireRelease
:
6089 case llvm::AtomicOrdering::SequentiallyConsistent
:
6090 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6091 llvm::AtomicOrdering::Release
);
6093 case llvm::AtomicOrdering::Acquire
:
6094 case llvm::AtomicOrdering::Monotonic
:
6096 case llvm::AtomicOrdering::NotAtomic
:
6097 case llvm::AtomicOrdering::Unordered
:
6098 llvm_unreachable("Unexpected ordering.");
6102 static std::pair
<bool, RValue
> emitOMPAtomicRMW(CodeGenFunction
&CGF
, LValue X
,
6104 BinaryOperatorKind BO
,
6105 llvm::AtomicOrdering AO
,
6106 bool IsXLHSInRHSPart
) {
6107 ASTContext
&Context
= CGF
.getContext();
6108 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6109 // expression is simple and atomic is allowed for the given type for the
6111 if (BO
== BO_Comma
|| !Update
.isScalar() || !X
.isSimple() ||
6112 (!isa
<llvm::ConstantInt
>(Update
.getScalarVal()) &&
6113 (Update
.getScalarVal()->getType() !=
6114 X
.getAddress(CGF
).getElementType())) ||
6115 !Context
.getTargetInfo().hasBuiltinAtomic(
6116 Context
.getTypeSize(X
.getType()), Context
.toBits(X
.getAlignment())))
6117 return std::make_pair(false, RValue::get(nullptr));
6119 auto &&CheckAtomicSupport
= [&CGF
](llvm::Type
*T
, BinaryOperatorKind BO
) {
6120 if (T
->isIntegerTy())
6123 if (T
->isFloatingPointTy() && (BO
== BO_Add
|| BO
== BO_Sub
))
6124 return llvm::isPowerOf2_64(CGF
.CGM
.getDataLayout().getTypeStoreSize(T
));
6129 if (!CheckAtomicSupport(Update
.getScalarVal()->getType(), BO
) ||
6130 !CheckAtomicSupport(X
.getAddress(CGF
).getElementType(), BO
))
6131 return std::make_pair(false, RValue::get(nullptr));
6133 bool IsInteger
= X
.getAddress(CGF
).getElementType()->isIntegerTy();
6134 llvm::AtomicRMWInst::BinOp RMWOp
;
6137 RMWOp
= IsInteger
? llvm::AtomicRMWInst::Add
: llvm::AtomicRMWInst::FAdd
;
6140 if (!IsXLHSInRHSPart
)
6141 return std::make_pair(false, RValue::get(nullptr));
6142 RMWOp
= IsInteger
? llvm::AtomicRMWInst::Sub
: llvm::AtomicRMWInst::FSub
;
6145 RMWOp
= llvm::AtomicRMWInst::And
;
6148 RMWOp
= llvm::AtomicRMWInst::Or
;
6151 RMWOp
= llvm::AtomicRMWInst::Xor
;
6155 RMWOp
= X
.getType()->hasSignedIntegerRepresentation()
6156 ? (IsXLHSInRHSPart
? llvm::AtomicRMWInst::Min
6157 : llvm::AtomicRMWInst::Max
)
6158 : (IsXLHSInRHSPart
? llvm::AtomicRMWInst::UMin
6159 : llvm::AtomicRMWInst::UMax
);
6161 RMWOp
= IsXLHSInRHSPart
? llvm::AtomicRMWInst::FMin
6162 : llvm::AtomicRMWInst::FMax
;
6166 RMWOp
= X
.getType()->hasSignedIntegerRepresentation()
6167 ? (IsXLHSInRHSPart
? llvm::AtomicRMWInst::Max
6168 : llvm::AtomicRMWInst::Min
)
6169 : (IsXLHSInRHSPart
? llvm::AtomicRMWInst::UMax
6170 : llvm::AtomicRMWInst::UMin
);
6172 RMWOp
= IsXLHSInRHSPart
? llvm::AtomicRMWInst::FMax
6173 : llvm::AtomicRMWInst::FMin
;
6176 RMWOp
= llvm::AtomicRMWInst::Xchg
;
6185 return std::make_pair(false, RValue::get(nullptr));
6204 llvm_unreachable("Unsupported atomic update operation");
6206 llvm::Value
*UpdateVal
= Update
.getScalarVal();
6207 if (auto *IC
= dyn_cast
<llvm::ConstantInt
>(UpdateVal
)) {
6209 UpdateVal
= CGF
.Builder
.CreateIntCast(
6210 IC
, X
.getAddress(CGF
).getElementType(),
6211 X
.getType()->hasSignedIntegerRepresentation());
6213 UpdateVal
= CGF
.Builder
.CreateCast(llvm::Instruction::CastOps::UIToFP
, IC
,
6214 X
.getAddress(CGF
).getElementType());
6217 CGF
.Builder
.CreateAtomicRMW(RMWOp
, X
.getPointer(CGF
), UpdateVal
, AO
);
6218 return std::make_pair(true, RValue::get(Res
));
6221 std::pair
<bool, RValue
> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6222 LValue X
, RValue E
, BinaryOperatorKind BO
, bool IsXLHSInRHSPart
,
6223 llvm::AtomicOrdering AO
, SourceLocation Loc
,
6224 const llvm::function_ref
<RValue(RValue
)> CommonGen
) {
6225 // Update expressions are allowed to have the following forms:
6226 // x binop= expr; -> xrval + expr;
6227 // x++, ++x -> xrval + 1;
6228 // x--, --x -> xrval - 1;
6229 // x = x binop expr; -> xrval binop expr
6230 // x = expr Op x; - > expr binop xrval;
6231 auto Res
= emitOMPAtomicRMW(*this, X
, E
, BO
, AO
, IsXLHSInRHSPart
);
6233 if (X
.isGlobalReg()) {
6234 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6236 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X
, Loc
)), X
);
6238 // Perform compare-and-swap procedure.
6239 EmitAtomicUpdate(X
, AO
, CommonGen
, X
.getType().isVolatileQualified());
6245 static void emitOMPAtomicUpdateExpr(CodeGenFunction
&CGF
,
6246 llvm::AtomicOrdering AO
, const Expr
*X
,
6247 const Expr
*E
, const Expr
*UE
,
6248 bool IsXLHSInRHSPart
, SourceLocation Loc
) {
6249 assert(isa
<BinaryOperator
>(UE
->IgnoreImpCasts()) &&
6250 "Update expr in 'atomic update' must be a binary operator.");
6251 const auto *BOUE
= cast
<BinaryOperator
>(UE
->IgnoreImpCasts());
6252 // Update expressions are allowed to have the following forms:
6253 // x binop= expr; -> xrval + expr;
6254 // x++, ++x -> xrval + 1;
6255 // x--, --x -> xrval - 1;
6256 // x = x binop expr; -> xrval binop expr
6257 // x = expr Op x; - > expr binop xrval;
6258 assert(X
->isLValue() && "X of 'omp atomic update' is not lvalue");
6259 LValue XLValue
= CGF
.EmitLValue(X
);
6260 RValue ExprRValue
= CGF
.EmitAnyExpr(E
);
6261 const auto *LHS
= cast
<OpaqueValueExpr
>(BOUE
->getLHS()->IgnoreImpCasts());
6262 const auto *RHS
= cast
<OpaqueValueExpr
>(BOUE
->getRHS()->IgnoreImpCasts());
6263 const OpaqueValueExpr
*XRValExpr
= IsXLHSInRHSPart
? LHS
: RHS
;
6264 const OpaqueValueExpr
*ERValExpr
= IsXLHSInRHSPart
? RHS
: LHS
;
6265 auto &&Gen
= [&CGF
, UE
, ExprRValue
, XRValExpr
, ERValExpr
](RValue XRValue
) {
6266 CodeGenFunction::OpaqueValueMapping
MapExpr(CGF
, ERValExpr
, ExprRValue
);
6267 CodeGenFunction::OpaqueValueMapping
MapX(CGF
, XRValExpr
, XRValue
);
6268 return CGF
.EmitAnyExpr(UE
);
6270 (void)CGF
.EmitOMPAtomicSimpleUpdateExpr(
6271 XLValue
, ExprRValue
, BOUE
->getOpcode(), IsXLHSInRHSPart
, AO
, Loc
, Gen
);
6272 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6273 // OpenMP, 2.17.7, atomic Construct
6274 // If the write, update, or capture clause is specified and the release,
6275 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6276 // the atomic operation is also a release flush.
6278 case llvm::AtomicOrdering::Release
:
6279 case llvm::AtomicOrdering::AcquireRelease
:
6280 case llvm::AtomicOrdering::SequentiallyConsistent
:
6281 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6282 llvm::AtomicOrdering::Release
);
6284 case llvm::AtomicOrdering::Acquire
:
6285 case llvm::AtomicOrdering::Monotonic
:
6287 case llvm::AtomicOrdering::NotAtomic
:
6288 case llvm::AtomicOrdering::Unordered
:
6289 llvm_unreachable("Unexpected ordering.");
6293 static RValue
convertToType(CodeGenFunction
&CGF
, RValue Value
,
6294 QualType SourceType
, QualType ResType
,
6295 SourceLocation Loc
) {
6296 switch (CGF
.getEvaluationKind(ResType
)) {
6299 convertToScalarValue(CGF
, Value
, SourceType
, ResType
, Loc
));
6301 auto Res
= convertToComplexValue(CGF
, Value
, SourceType
, ResType
, Loc
);
6302 return RValue::getComplex(Res
.first
, Res
.second
);
6307 llvm_unreachable("Must be a scalar or complex.");
6310 static void emitOMPAtomicCaptureExpr(CodeGenFunction
&CGF
,
6311 llvm::AtomicOrdering AO
,
6312 bool IsPostfixUpdate
, const Expr
*V
,
6313 const Expr
*X
, const Expr
*E
,
6314 const Expr
*UE
, bool IsXLHSInRHSPart
,
6315 SourceLocation Loc
) {
6316 assert(X
->isLValue() && "X of 'omp atomic capture' is not lvalue");
6317 assert(V
->isLValue() && "V of 'omp atomic capture' is not lvalue");
6319 LValue VLValue
= CGF
.EmitLValue(V
);
6320 LValue XLValue
= CGF
.EmitLValue(X
);
6321 RValue ExprRValue
= CGF
.EmitAnyExpr(E
);
6322 QualType NewVValType
;
6324 // 'x' is updated with some additional value.
6325 assert(isa
<BinaryOperator
>(UE
->IgnoreImpCasts()) &&
6326 "Update expr in 'atomic capture' must be a binary operator.");
6327 const auto *BOUE
= cast
<BinaryOperator
>(UE
->IgnoreImpCasts());
6328 // Update expressions are allowed to have the following forms:
6329 // x binop= expr; -> xrval + expr;
6330 // x++, ++x -> xrval + 1;
6331 // x--, --x -> xrval - 1;
6332 // x = x binop expr; -> xrval binop expr
6333 // x = expr Op x; - > expr binop xrval;
6334 const auto *LHS
= cast
<OpaqueValueExpr
>(BOUE
->getLHS()->IgnoreImpCasts());
6335 const auto *RHS
= cast
<OpaqueValueExpr
>(BOUE
->getRHS()->IgnoreImpCasts());
6336 const OpaqueValueExpr
*XRValExpr
= IsXLHSInRHSPart
? LHS
: RHS
;
6337 NewVValType
= XRValExpr
->getType();
6338 const OpaqueValueExpr
*ERValExpr
= IsXLHSInRHSPart
? RHS
: LHS
;
6339 auto &&Gen
= [&CGF
, &NewVVal
, UE
, ExprRValue
, XRValExpr
, ERValExpr
,
6340 IsPostfixUpdate
](RValue XRValue
) {
6341 CodeGenFunction::OpaqueValueMapping
MapExpr(CGF
, ERValExpr
, ExprRValue
);
6342 CodeGenFunction::OpaqueValueMapping
MapX(CGF
, XRValExpr
, XRValue
);
6343 RValue Res
= CGF
.EmitAnyExpr(UE
);
6344 NewVVal
= IsPostfixUpdate
? XRValue
: Res
;
6347 auto Res
= CGF
.EmitOMPAtomicSimpleUpdateExpr(
6348 XLValue
, ExprRValue
, BOUE
->getOpcode(), IsXLHSInRHSPart
, AO
, Loc
, Gen
);
6349 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6351 // 'atomicrmw' instruction was generated.
6352 if (IsPostfixUpdate
) {
6353 // Use old value from 'atomicrmw'.
6354 NewVVal
= Res
.second
;
6356 // 'atomicrmw' does not provide new value, so evaluate it using old
6358 CodeGenFunction::OpaqueValueMapping
MapExpr(CGF
, ERValExpr
, ExprRValue
);
6359 CodeGenFunction::OpaqueValueMapping
MapX(CGF
, XRValExpr
, Res
.second
);
6360 NewVVal
= CGF
.EmitAnyExpr(UE
);
6364 // 'x' is simply rewritten with some 'expr'.
6365 NewVValType
= X
->getType().getNonReferenceType();
6366 ExprRValue
= convertToType(CGF
, ExprRValue
, E
->getType(),
6367 X
->getType().getNonReferenceType(), Loc
);
6368 auto &&Gen
= [&NewVVal
, ExprRValue
](RValue XRValue
) {
6372 // Try to perform atomicrmw xchg, otherwise simple exchange.
6373 auto Res
= CGF
.EmitOMPAtomicSimpleUpdateExpr(
6374 XLValue
, ExprRValue
, /*BO=*/BO_Assign
, /*IsXLHSInRHSPart=*/false, AO
,
6376 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, X
);
6378 // 'atomicrmw' instruction was generated.
6379 NewVVal
= IsPostfixUpdate
? Res
.second
: ExprRValue
;
6382 // Emit post-update store to 'v' of old/new 'x' value.
6383 CGF
.emitOMPSimpleStore(VLValue
, NewVVal
, NewVValType
, Loc
);
6384 CGF
.CGM
.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF
, V
);
6385 // OpenMP 5.1 removes the required flush for capture clause.
6386 if (CGF
.CGM
.getLangOpts().OpenMP
< 51) {
6387 // OpenMP, 2.17.7, atomic Construct
6388 // If the write, update, or capture clause is specified and the release,
6389 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6390 // the atomic operation is also a release flush.
6391 // If the read or capture clause is specified and the acquire, acq_rel, or
6392 // seq_cst clause is specified then the strong flush on exit from the atomic
6393 // operation is also an acquire flush.
6395 case llvm::AtomicOrdering::Release
:
6396 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6397 llvm::AtomicOrdering::Release
);
6399 case llvm::AtomicOrdering::Acquire
:
6400 CGF
.CGM
.getOpenMPRuntime().emitFlush(CGF
, std::nullopt
, Loc
,
6401 llvm::AtomicOrdering::Acquire
);
6403 case llvm::AtomicOrdering::AcquireRelease
:
6404 case llvm::AtomicOrdering::SequentiallyConsistent
:
6405 CGF
.CGM
.getOpenMPRuntime().emitFlush(
6406 CGF
, std::nullopt
, Loc
, llvm::AtomicOrdering::AcquireRelease
);
6408 case llvm::AtomicOrdering::Monotonic
:
6410 case llvm::AtomicOrdering::NotAtomic
:
6411 case llvm::AtomicOrdering::Unordered
:
6412 llvm_unreachable("Unexpected ordering.");
6417 static void emitOMPAtomicCompareExpr(CodeGenFunction
&CGF
,
6418 llvm::AtomicOrdering AO
, const Expr
*X
,
6419 const Expr
*V
, const Expr
*R
,
6420 const Expr
*E
, const Expr
*D
,
6421 const Expr
*CE
, bool IsXBinopExpr
,
6422 bool IsPostfixUpdate
, bool IsFailOnly
,
6423 SourceLocation Loc
) {
6424 llvm::OpenMPIRBuilder
&OMPBuilder
=
6425 CGF
.CGM
.getOpenMPRuntime().getOMPBuilder();
6427 OMPAtomicCompareOp Op
;
6428 assert(isa
<BinaryOperator
>(CE
) && "CE is not a BinaryOperator");
6429 switch (cast
<BinaryOperator
>(CE
)->getOpcode()) {
6431 Op
= OMPAtomicCompareOp::EQ
;
6434 Op
= OMPAtomicCompareOp::MIN
;
6437 Op
= OMPAtomicCompareOp::MAX
;
6440 llvm_unreachable("unsupported atomic compare binary operator");
6443 LValue XLVal
= CGF
.EmitLValue(X
);
6444 Address XAddr
= XLVal
.getAddress(CGF
);
6446 auto EmitRValueWithCastIfNeeded
= [&CGF
, Loc
](const Expr
*X
, const Expr
*E
) {
6447 if (X
->getType() == E
->getType())
6448 return CGF
.EmitScalarExpr(E
);
6449 const Expr
*NewE
= E
->IgnoreImplicitAsWritten();
6450 llvm::Value
*V
= CGF
.EmitScalarExpr(NewE
);
6451 if (NewE
->getType() == X
->getType())
6453 return CGF
.EmitScalarConversion(V
, NewE
->getType(), X
->getType(), Loc
);
6456 llvm::Value
*EVal
= EmitRValueWithCastIfNeeded(X
, E
);
6457 llvm::Value
*DVal
= D
? EmitRValueWithCastIfNeeded(X
, D
) : nullptr;
6458 if (auto *CI
= dyn_cast
<llvm::ConstantInt
>(EVal
))
6459 EVal
= CGF
.Builder
.CreateIntCast(
6460 CI
, XLVal
.getAddress(CGF
).getElementType(),
6461 E
->getType()->hasSignedIntegerRepresentation());
6463 if (auto *CI
= dyn_cast
<llvm::ConstantInt
>(DVal
))
6464 DVal
= CGF
.Builder
.CreateIntCast(
6465 CI
, XLVal
.getAddress(CGF
).getElementType(),
6466 D
->getType()->hasSignedIntegerRepresentation());
6468 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal
{
6469 XAddr
.getPointer(), XAddr
.getElementType(),
6470 X
->getType()->hasSignedIntegerRepresentation(),
6471 X
->getType().isVolatileQualified()};
6472 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal
, ROpVal
;
6474 LValue LV
= CGF
.EmitLValue(V
);
6475 Address Addr
= LV
.getAddress(CGF
);
6476 VOpVal
= {Addr
.getPointer(), Addr
.getElementType(),
6477 V
->getType()->hasSignedIntegerRepresentation(),
6478 V
->getType().isVolatileQualified()};
6481 LValue LV
= CGF
.EmitLValue(R
);
6482 Address Addr
= LV
.getAddress(CGF
);
6483 ROpVal
= {Addr
.getPointer(), Addr
.getElementType(),
6484 R
->getType()->hasSignedIntegerRepresentation(),
6485 R
->getType().isVolatileQualified()};
6488 CGF
.Builder
.restoreIP(OMPBuilder
.createAtomicCompare(
6489 CGF
.Builder
, XOpVal
, VOpVal
, ROpVal
, EVal
, DVal
, AO
, Op
, IsXBinopExpr
,
6490 IsPostfixUpdate
, IsFailOnly
));
6493 static void emitOMPAtomicExpr(CodeGenFunction
&CGF
, OpenMPClauseKind Kind
,
6494 llvm::AtomicOrdering AO
, bool IsPostfixUpdate
,
6495 const Expr
*X
, const Expr
*V
, const Expr
*R
,
6496 const Expr
*E
, const Expr
*UE
, const Expr
*D
,
6497 const Expr
*CE
, bool IsXLHSInRHSPart
,
6498 bool IsFailOnly
, SourceLocation Loc
) {
6501 emitOMPAtomicReadExpr(CGF
, AO
, X
, V
, Loc
);
6504 emitOMPAtomicWriteExpr(CGF
, AO
, X
, E
, Loc
);
6508 emitOMPAtomicUpdateExpr(CGF
, AO
, X
, E
, UE
, IsXLHSInRHSPart
, Loc
);
6511 emitOMPAtomicCaptureExpr(CGF
, AO
, IsPostfixUpdate
, V
, X
, E
, UE
,
6512 IsXLHSInRHSPart
, Loc
);
6514 case OMPC_compare
: {
6515 emitOMPAtomicCompareExpr(CGF
, AO
, X
, V
, R
, E
, D
, CE
, IsXLHSInRHSPart
,
6516 IsPostfixUpdate
, IsFailOnly
, Loc
);
6520 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6524 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective
&S
) {
6525 llvm::AtomicOrdering AO
= llvm::AtomicOrdering::Monotonic
;
6526 bool MemOrderingSpecified
= false;
6527 if (S
.getSingleClause
<OMPSeqCstClause
>()) {
6528 AO
= llvm::AtomicOrdering::SequentiallyConsistent
;
6529 MemOrderingSpecified
= true;
6530 } else if (S
.getSingleClause
<OMPAcqRelClause
>()) {
6531 AO
= llvm::AtomicOrdering::AcquireRelease
;
6532 MemOrderingSpecified
= true;
6533 } else if (S
.getSingleClause
<OMPAcquireClause
>()) {
6534 AO
= llvm::AtomicOrdering::Acquire
;
6535 MemOrderingSpecified
= true;
6536 } else if (S
.getSingleClause
<OMPReleaseClause
>()) {
6537 AO
= llvm::AtomicOrdering::Release
;
6538 MemOrderingSpecified
= true;
6539 } else if (S
.getSingleClause
<OMPRelaxedClause
>()) {
6540 AO
= llvm::AtomicOrdering::Monotonic
;
6541 MemOrderingSpecified
= true;
6543 llvm::SmallSet
<OpenMPClauseKind
, 2> KindsEncountered
;
6544 OpenMPClauseKind Kind
= OMPC_unknown
;
6545 for (const OMPClause
*C
: S
.clauses()) {
6546 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6548 OpenMPClauseKind K
= C
->getClauseKind();
6549 if (K
== OMPC_seq_cst
|| K
== OMPC_acq_rel
|| K
== OMPC_acquire
||
6550 K
== OMPC_release
|| K
== OMPC_relaxed
|| K
== OMPC_hint
)
6553 KindsEncountered
.insert(K
);
6555 // We just need to correct Kind here. No need to set a bool saying it is
6556 // actually compare capture because we can tell from whether V and R are
6558 if (KindsEncountered
.contains(OMPC_compare
) &&
6559 KindsEncountered
.contains(OMPC_capture
))
6560 Kind
= OMPC_compare
;
6561 if (!MemOrderingSpecified
) {
6562 llvm::AtomicOrdering DefaultOrder
=
6563 CGM
.getOpenMPRuntime().getDefaultMemoryOrdering();
6564 if (DefaultOrder
== llvm::AtomicOrdering::Monotonic
||
6565 DefaultOrder
== llvm::AtomicOrdering::SequentiallyConsistent
||
6566 (DefaultOrder
== llvm::AtomicOrdering::AcquireRelease
&&
6567 Kind
== OMPC_capture
)) {
6569 } else if (DefaultOrder
== llvm::AtomicOrdering::AcquireRelease
) {
6570 if (Kind
== OMPC_unknown
|| Kind
== OMPC_update
|| Kind
== OMPC_write
) {
6571 AO
= llvm::AtomicOrdering::Release
;
6572 } else if (Kind
== OMPC_read
) {
6573 assert(Kind
== OMPC_read
&& "Unexpected atomic kind.");
6574 AO
= llvm::AtomicOrdering::Acquire
;
6579 LexicalScope
Scope(*this, S
.getSourceRange());
6580 EmitStopPoint(S
.getAssociatedStmt());
6581 emitOMPAtomicExpr(*this, Kind
, AO
, S
.isPostfixUpdate(), S
.getX(), S
.getV(),
6582 S
.getR(), S
.getExpr(), S
.getUpdateExpr(), S
.getD(),
6583 S
.getCondExpr(), S
.isXLHSInRHSPart(), S
.isFailOnly(),
6587 static void emitCommonOMPTargetDirective(CodeGenFunction
&CGF
,
6588 const OMPExecutableDirective
&S
,
6589 const RegionCodeGenTy
&CodeGen
) {
6590 assert(isOpenMPTargetExecutionDirective(S
.getDirectiveKind()));
6591 CodeGenModule
&CGM
= CGF
.CGM
;
6593 // On device emit this construct as inlined code.
6594 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
6595 OMPLexicalScope
Scope(CGF
, S
, OMPD_target
);
6596 CGM
.getOpenMPRuntime().emitInlinedDirective(
6597 CGF
, OMPD_target
, [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6598 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
6603 auto LPCRegion
= CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF
, S
);
6604 llvm::Function
*Fn
= nullptr;
6605 llvm::Constant
*FnID
= nullptr;
6607 const Expr
*IfCond
= nullptr;
6608 // Check for the at most one if clause associated with the target region.
6609 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
6610 if (C
->getNameModifier() == OMPD_unknown
||
6611 C
->getNameModifier() == OMPD_target
) {
6612 IfCond
= C
->getCondition();
6617 // Check if we have any device clause associated with the directive.
6618 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device(
6619 nullptr, OMPC_DEVICE_unknown
);
6620 if (auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
6621 Device
.setPointerAndInt(C
->getDevice(), C
->getModifier());
6623 // Check if we have an if clause whose conditional always evaluates to false
6624 // or if we do not have any targets specified. If so the target region is not
6625 // an offload entry point.
6626 bool IsOffloadEntry
= true;
6629 if (CGF
.ConstantFoldsToSimpleInteger(IfCond
, Val
) && !Val
)
6630 IsOffloadEntry
= false;
6632 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
6633 IsOffloadEntry
= false;
6635 if (CGM
.getLangOpts().OpenMPOffloadMandatory
&& !IsOffloadEntry
) {
6636 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
6637 DiagnosticsEngine::Error
,
6638 "No offloading entry generated while offloading is mandatory.");
6639 CGM
.getDiags().Report(DiagID
);
6642 assert(CGF
.CurFuncDecl
&& "No parent declaration for target region!");
6643 StringRef ParentName
;
6644 // In case we have Ctors/Dtors we use the complete type variant to produce
6645 // the mangling of the device outlined kernel.
6646 if (const auto *D
= dyn_cast
<CXXConstructorDecl
>(CGF
.CurFuncDecl
))
6647 ParentName
= CGM
.getMangledName(GlobalDecl(D
, Ctor_Complete
));
6648 else if (const auto *D
= dyn_cast
<CXXDestructorDecl
>(CGF
.CurFuncDecl
))
6649 ParentName
= CGM
.getMangledName(GlobalDecl(D
, Dtor_Complete
));
6652 CGM
.getMangledName(GlobalDecl(cast
<FunctionDecl
>(CGF
.CurFuncDecl
)));
6654 // Emit target region as a standalone region.
6655 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(S
, ParentName
, Fn
, FnID
,
6656 IsOffloadEntry
, CodeGen
);
6657 OMPLexicalScope
Scope(CGF
, S
, OMPD_task
);
6658 auto &&SizeEmitter
=
6659 [IsOffloadEntry
](CodeGenFunction
&CGF
,
6660 const OMPLoopDirective
&D
) -> llvm::Value
* {
6661 if (IsOffloadEntry
) {
6662 OMPLoopScope(CGF
, D
);
6663 // Emit calculation of the iterations count.
6664 llvm::Value
*NumIterations
= CGF
.EmitScalarExpr(D
.getNumIterations());
6665 NumIterations
= CGF
.Builder
.CreateIntCast(NumIterations
, CGF
.Int64Ty
,
6666 /*isSigned=*/false);
6667 return NumIterations
;
6671 CGM
.getOpenMPRuntime().emitTargetCall(CGF
, S
, Fn
, FnID
, IfCond
, Device
,
6675 static void emitTargetRegion(CodeGenFunction
&CGF
, const OMPTargetDirective
&S
,
6676 PrePostActionTy
&Action
) {
6678 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6679 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
6680 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
6681 (void)PrivateScope
.Privatize();
6682 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
6683 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
6685 CGF
.EmitStmt(S
.getCapturedStmt(OMPD_target
)->getCapturedStmt());
6686 CGF
.EnsureInsertPoint();
6689 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule
&CGM
,
6690 StringRef ParentName
,
6691 const OMPTargetDirective
&S
) {
6692 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6693 emitTargetRegion(CGF
, S
, Action
);
6696 llvm::Constant
*Addr
;
6697 // Emit target region as a standalone region.
6698 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6699 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6700 assert(Fn
&& Addr
&& "Target device function emission failed.");
6703 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective
&S
) {
6704 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6705 emitTargetRegion(CGF
, S
, Action
);
6707 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6710 static void emitCommonOMPTeamsDirective(CodeGenFunction
&CGF
,
6711 const OMPExecutableDirective
&S
,
6712 OpenMPDirectiveKind InnermostKind
,
6713 const RegionCodeGenTy
&CodeGen
) {
6714 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_teams
);
6715 llvm::Function
*OutlinedFn
=
6716 CGF
.CGM
.getOpenMPRuntime().emitTeamsOutlinedFunction(
6717 CGF
, S
, *CS
->getCapturedDecl()->param_begin(), InnermostKind
,
6720 const auto *NT
= S
.getSingleClause
<OMPNumTeamsClause
>();
6721 const auto *TL
= S
.getSingleClause
<OMPThreadLimitClause
>();
6723 const Expr
*NumTeams
= NT
? NT
->getNumTeams() : nullptr;
6724 const Expr
*ThreadLimit
= TL
? TL
->getThreadLimit() : nullptr;
6726 CGF
.CGM
.getOpenMPRuntime().emitNumTeamsClause(CGF
, NumTeams
, ThreadLimit
,
6730 OMPTeamsScope
Scope(CGF
, S
);
6731 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
6732 CGF
.GenerateOpenMPCapturedVars(*CS
, CapturedVars
);
6733 CGF
.CGM
.getOpenMPRuntime().emitTeamsCall(CGF
, S
, S
.getBeginLoc(), OutlinedFn
,
6737 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective
&S
) {
6738 // Emit teams region as a standalone region.
6739 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6741 OMPPrivateScope
PrivateScope(CGF
);
6742 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
6743 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
6744 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6745 (void)PrivateScope
.Privatize();
6746 CGF
.EmitStmt(S
.getCapturedStmt(OMPD_teams
)->getCapturedStmt());
6747 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6749 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute
, CodeGen
);
6750 emitPostUpdateForReductionClause(*this, S
,
6751 [](CodeGenFunction
&) { return nullptr; });
6754 static void emitTargetTeamsRegion(CodeGenFunction
&CGF
, PrePostActionTy
&Action
,
6755 const OMPTargetTeamsDirective
&S
) {
6756 auto *CS
= S
.getCapturedStmt(OMPD_teams
);
6758 // Emit teams region as a standalone region.
6759 auto &&CodeGen
= [&S
, CS
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6761 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6762 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
6763 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
6764 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6765 (void)PrivateScope
.Privatize();
6766 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
6767 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
6768 CGF
.EmitStmt(CS
->getCapturedStmt());
6769 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6771 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_teams
, CodeGen
);
6772 emitPostUpdateForReductionClause(CGF
, S
,
6773 [](CodeGenFunction
&) { return nullptr; });
6776 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6777 CodeGenModule
&CGM
, StringRef ParentName
,
6778 const OMPTargetTeamsDirective
&S
) {
6779 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6780 emitTargetTeamsRegion(CGF
, Action
, S
);
6783 llvm::Constant
*Addr
;
6784 // Emit target region as a standalone region.
6785 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6786 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6787 assert(Fn
&& Addr
&& "Target device function emission failed.");
6790 void CodeGenFunction::EmitOMPTargetTeamsDirective(
6791 const OMPTargetTeamsDirective
&S
) {
6792 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6793 emitTargetTeamsRegion(CGF
, Action
, S
);
6795 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6799 emitTargetTeamsDistributeRegion(CodeGenFunction
&CGF
, PrePostActionTy
&Action
,
6800 const OMPTargetTeamsDistributeDirective
&S
) {
6802 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6803 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6806 // Emit teams region as a standalone region.
6807 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6808 PrePostActionTy
&Action
) {
6810 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6811 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6812 (void)PrivateScope
.Privatize();
6813 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6815 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6817 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute
, CodeGen
);
6818 emitPostUpdateForReductionClause(CGF
, S
,
6819 [](CodeGenFunction
&) { return nullptr; });
6822 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6823 CodeGenModule
&CGM
, StringRef ParentName
,
6824 const OMPTargetTeamsDistributeDirective
&S
) {
6825 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6826 emitTargetTeamsDistributeRegion(CGF
, Action
, S
);
6829 llvm::Constant
*Addr
;
6830 // Emit target region as a standalone region.
6831 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6832 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6833 assert(Fn
&& Addr
&& "Target device function emission failed.");
6836 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6837 const OMPTargetTeamsDistributeDirective
&S
) {
6838 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6839 emitTargetTeamsDistributeRegion(CGF
, Action
, S
);
6841 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6844 static void emitTargetTeamsDistributeSimdRegion(
6845 CodeGenFunction
&CGF
, PrePostActionTy
&Action
,
6846 const OMPTargetTeamsDistributeSimdDirective
&S
) {
6848 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6849 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6852 // Emit teams region as a standalone region.
6853 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6854 PrePostActionTy
&Action
) {
6856 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
6857 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6858 (void)PrivateScope
.Privatize();
6859 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6861 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6863 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute_simd
, CodeGen
);
6864 emitPostUpdateForReductionClause(CGF
, S
,
6865 [](CodeGenFunction
&) { return nullptr; });
6868 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6869 CodeGenModule
&CGM
, StringRef ParentName
,
6870 const OMPTargetTeamsDistributeSimdDirective
&S
) {
6871 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6872 emitTargetTeamsDistributeSimdRegion(CGF
, Action
, S
);
6875 llvm::Constant
*Addr
;
6876 // Emit target region as a standalone region.
6877 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
6878 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
6879 assert(Fn
&& Addr
&& "Target device function emission failed.");
6882 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6883 const OMPTargetTeamsDistributeSimdDirective
&S
) {
6884 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
6885 emitTargetTeamsDistributeSimdRegion(CGF
, Action
, S
);
6887 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
6890 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6891 const OMPTeamsDistributeDirective
&S
) {
6893 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6894 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6897 // Emit teams region as a standalone region.
6898 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6899 PrePostActionTy
&Action
) {
6901 OMPPrivateScope
PrivateScope(CGF
);
6902 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6903 (void)PrivateScope
.Privatize();
6904 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6906 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6908 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute
, CodeGen
);
6909 emitPostUpdateForReductionClause(*this, S
,
6910 [](CodeGenFunction
&) { return nullptr; });
6913 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6914 const OMPTeamsDistributeSimdDirective
&S
) {
6915 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6916 CGF
.EmitOMPDistributeLoop(S
, emitOMPLoopBodyWithStopPoint
, S
.getInc());
6919 // Emit teams region as a standalone region.
6920 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6921 PrePostActionTy
&Action
) {
6923 OMPPrivateScope
PrivateScope(CGF
);
6924 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6925 (void)PrivateScope
.Privatize();
6926 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_simd
,
6928 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6930 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute_simd
, CodeGen
);
6931 emitPostUpdateForReductionClause(*this, S
,
6932 [](CodeGenFunction
&) { return nullptr; });
6935 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
6936 const OMPTeamsDistributeParallelForDirective
&S
) {
6937 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6938 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
6942 // Emit teams region as a standalone region.
6943 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6944 PrePostActionTy
&Action
) {
6946 OMPPrivateScope
PrivateScope(CGF
);
6947 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6948 (void)PrivateScope
.Privatize();
6949 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
6951 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6953 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute_parallel_for
, CodeGen
);
6954 emitPostUpdateForReductionClause(*this, S
,
6955 [](CodeGenFunction
&) { return nullptr; });
6958 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
6959 const OMPTeamsDistributeParallelForSimdDirective
&S
) {
6960 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
6961 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
6965 // Emit teams region as a standalone region.
6966 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
6967 PrePostActionTy
&Action
) {
6969 OMPPrivateScope
PrivateScope(CGF
);
6970 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
6971 (void)PrivateScope
.Privatize();
6972 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
6973 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
6974 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
6976 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute_parallel_for_simd
,
6978 emitPostUpdateForReductionClause(*this, S
,
6979 [](CodeGenFunction
&) { return nullptr; });
6982 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective
&S
) {
6983 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
6984 llvm::Value
*Device
= nullptr;
6985 llvm::Value
*NumDependences
= nullptr;
6986 llvm::Value
*DependenceList
= nullptr;
6988 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
6989 Device
= EmitScalarExpr(C
->getDevice());
6991 // Build list and emit dependences
6993 buildDependences(S
, Data
);
6994 if (!Data
.Dependences
.empty()) {
6995 Address DependenciesArray
= Address::invalid();
6996 std::tie(NumDependences
, DependenciesArray
) =
6997 CGM
.getOpenMPRuntime().emitDependClause(*this, Data
.Dependences
,
6999 DependenceList
= DependenciesArray
.getPointer();
7001 Data
.HasNowaitClause
= S
.hasClausesOfKind
<OMPNowaitClause
>();
7003 assert(!(Data
.HasNowaitClause
&& !(S
.getSingleClause
<OMPInitClause
>() ||
7004 S
.getSingleClause
<OMPDestroyClause
>() ||
7005 S
.getSingleClause
<OMPUseClause
>())) &&
7006 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
7008 if (const auto *C
= S
.getSingleClause
<OMPInitClause
>()) {
7009 llvm::Value
*InteropvarPtr
=
7010 EmitLValue(C
->getInteropVar()).getPointer(*this);
7011 llvm::omp::OMPInteropType InteropType
= llvm::omp::OMPInteropType::Unknown
;
7012 if (C
->getIsTarget()) {
7013 InteropType
= llvm::omp::OMPInteropType::Target
;
7015 assert(C
->getIsTargetSync() && "Expected interop-type target/targetsync");
7016 InteropType
= llvm::omp::OMPInteropType::TargetSync
;
7018 OMPBuilder
.createOMPInteropInit(Builder
, InteropvarPtr
, InteropType
, Device
,
7019 NumDependences
, DependenceList
,
7020 Data
.HasNowaitClause
);
7021 } else if (const auto *C
= S
.getSingleClause
<OMPDestroyClause
>()) {
7022 llvm::Value
*InteropvarPtr
=
7023 EmitLValue(C
->getInteropVar()).getPointer(*this);
7024 OMPBuilder
.createOMPInteropDestroy(Builder
, InteropvarPtr
, Device
,
7025 NumDependences
, DependenceList
,
7026 Data
.HasNowaitClause
);
7027 } else if (const auto *C
= S
.getSingleClause
<OMPUseClause
>()) {
7028 llvm::Value
*InteropvarPtr
=
7029 EmitLValue(C
->getInteropVar()).getPointer(*this);
7030 OMPBuilder
.createOMPInteropUse(Builder
, InteropvarPtr
, Device
,
7031 NumDependences
, DependenceList
,
7032 Data
.HasNowaitClause
);
7036 static void emitTargetTeamsDistributeParallelForRegion(
7037 CodeGenFunction
&CGF
, const OMPTargetTeamsDistributeParallelForDirective
&S
,
7038 PrePostActionTy
&Action
) {
7040 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7041 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
7045 // Emit teams region as a standalone region.
7046 auto &&CodeGenTeams
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
7047 PrePostActionTy
&Action
) {
7049 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
7050 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7051 (void)PrivateScope
.Privatize();
7052 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
7053 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
7054 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
7057 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute_parallel_for
,
7059 emitPostUpdateForReductionClause(CGF
, S
,
7060 [](CodeGenFunction
&) { return nullptr; });
7063 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7064 CodeGenModule
&CGM
, StringRef ParentName
,
7065 const OMPTargetTeamsDistributeParallelForDirective
&S
) {
7066 // Emit SPMD target teams distribute parallel for region as a standalone
7068 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7069 emitTargetTeamsDistributeParallelForRegion(CGF
, S
, Action
);
7072 llvm::Constant
*Addr
;
7073 // Emit target region as a standalone region.
7074 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7075 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7076 assert(Fn
&& Addr
&& "Target device function emission failed.");
7079 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7080 const OMPTargetTeamsDistributeParallelForDirective
&S
) {
7081 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7082 emitTargetTeamsDistributeParallelForRegion(CGF
, S
, Action
);
7084 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7087 static void emitTargetTeamsDistributeParallelForSimdRegion(
7088 CodeGenFunction
&CGF
,
7089 const OMPTargetTeamsDistributeParallelForSimdDirective
&S
,
7090 PrePostActionTy
&Action
) {
7092 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7093 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
7097 // Emit teams region as a standalone region.
7098 auto &&CodeGenTeams
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
7099 PrePostActionTy
&Action
) {
7101 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
7102 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7103 (void)PrivateScope
.Privatize();
7104 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
7105 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
7106 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
7109 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute_parallel_for_simd
,
7111 emitPostUpdateForReductionClause(CGF
, S
,
7112 [](CodeGenFunction
&) { return nullptr; });
7115 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7116 CodeGenModule
&CGM
, StringRef ParentName
,
7117 const OMPTargetTeamsDistributeParallelForSimdDirective
&S
) {
7118 // Emit SPMD target teams distribute parallel for simd region as a standalone
7120 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7121 emitTargetTeamsDistributeParallelForSimdRegion(CGF
, S
, Action
);
7124 llvm::Constant
*Addr
;
7125 // Emit target region as a standalone region.
7126 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7127 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7128 assert(Fn
&& Addr
&& "Target device function emission failed.");
7131 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7132 const OMPTargetTeamsDistributeParallelForSimdDirective
&S
) {
7133 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7134 emitTargetTeamsDistributeParallelForSimdRegion(CGF
, S
, Action
);
7136 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7139 void CodeGenFunction::EmitOMPCancellationPointDirective(
7140 const OMPCancellationPointDirective
&S
) {
7141 CGM
.getOpenMPRuntime().emitCancellationPointCall(*this, S
.getBeginLoc(),
7142 S
.getCancelRegion());
7145 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective
&S
) {
7146 const Expr
*IfCond
= nullptr;
7147 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
7148 if (C
->getNameModifier() == OMPD_unknown
||
7149 C
->getNameModifier() == OMPD_cancel
) {
7150 IfCond
= C
->getCondition();
7154 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
7155 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
7156 // TODO: This check is necessary as we only generate `omp parallel` through
7157 // the OpenMPIRBuilder for now.
7158 if (S
.getCancelRegion() == OMPD_parallel
||
7159 S
.getCancelRegion() == OMPD_sections
||
7160 S
.getCancelRegion() == OMPD_section
) {
7161 llvm::Value
*IfCondition
= nullptr;
7163 IfCondition
= EmitScalarExpr(IfCond
,
7164 /*IgnoreResultAssign=*/true);
7165 return Builder
.restoreIP(
7166 OMPBuilder
.createCancel(Builder
, IfCondition
, S
.getCancelRegion()));
7170 CGM
.getOpenMPRuntime().emitCancelCall(*this, S
.getBeginLoc(), IfCond
,
7171 S
.getCancelRegion());
7174 CodeGenFunction::JumpDest
7175 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind
) {
7176 if (Kind
== OMPD_parallel
|| Kind
== OMPD_task
||
7177 Kind
== OMPD_target_parallel
|| Kind
== OMPD_taskloop
||
7178 Kind
== OMPD_master_taskloop
|| Kind
== OMPD_parallel_master_taskloop
)
7180 assert(Kind
== OMPD_for
|| Kind
== OMPD_section
|| Kind
== OMPD_sections
||
7181 Kind
== OMPD_parallel_sections
|| Kind
== OMPD_parallel_for
||
7182 Kind
== OMPD_distribute_parallel_for
||
7183 Kind
== OMPD_target_parallel_for
||
7184 Kind
== OMPD_teams_distribute_parallel_for
||
7185 Kind
== OMPD_target_teams_distribute_parallel_for
);
7186 return OMPCancelStack
.getExitBlock();
7189 void CodeGenFunction::EmitOMPUseDevicePtrClause(
7190 const OMPUseDevicePtrClause
&C
, OMPPrivateScope
&PrivateScope
,
7191 const llvm::DenseMap
<const ValueDecl
*, llvm::Value
*>
7192 CaptureDeviceAddrMap
) {
7193 llvm::SmallDenseSet
<CanonicalDeclPtr
<const Decl
>, 4> Processed
;
7194 for (const Expr
*OrigVarIt
: C
.varlists()) {
7195 const auto *OrigVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(OrigVarIt
)->getDecl());
7196 if (!Processed
.insert(OrigVD
).second
)
7199 // In order to identify the right initializer we need to match the
7200 // declaration used by the mapping logic. In some cases we may get
7201 // OMPCapturedExprDecl that refers to the original declaration.
7202 const ValueDecl
*MatchingVD
= OrigVD
;
7203 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(MatchingVD
)) {
7204 // OMPCapturedExprDecl are used to privative fields of the current
7206 const auto *ME
= cast
<MemberExpr
>(OED
->getInit());
7207 assert(isa
<CXXThisExpr
>(ME
->getBase()->IgnoreImpCasts()) &&
7208 "Base should be the current struct!");
7209 MatchingVD
= ME
->getMemberDecl();
7212 // If we don't have information about the current list item, move on to
7214 auto InitAddrIt
= CaptureDeviceAddrMap
.find(MatchingVD
);
7215 if (InitAddrIt
== CaptureDeviceAddrMap
.end())
7218 llvm::Type
*Ty
= ConvertTypeForMem(OrigVD
->getType().getNonReferenceType());
7220 // Return the address of the private variable.
7221 bool IsRegistered
= PrivateScope
.addPrivate(
7223 Address(InitAddrIt
->second
, Ty
,
7224 getContext().getTypeAlignInChars(getContext().VoidPtrTy
)));
7225 assert(IsRegistered
&& "firstprivate var already registered as private");
7226 // Silence the warning about unused variable.
7231 static const VarDecl
*getBaseDecl(const Expr
*Ref
) {
7232 const Expr
*Base
= Ref
->IgnoreParenImpCasts();
7233 while (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(Base
))
7234 Base
= OASE
->getBase()->IgnoreParenImpCasts();
7235 while (const auto *ASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
7236 Base
= ASE
->getBase()->IgnoreParenImpCasts();
7237 return cast
<VarDecl
>(cast
<DeclRefExpr
>(Base
)->getDecl());
7240 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7241 const OMPUseDeviceAddrClause
&C
, OMPPrivateScope
&PrivateScope
,
7242 const llvm::DenseMap
<const ValueDecl
*, llvm::Value
*>
7243 CaptureDeviceAddrMap
) {
7244 llvm::SmallDenseSet
<CanonicalDeclPtr
<const Decl
>, 4> Processed
;
7245 for (const Expr
*Ref
: C
.varlists()) {
7246 const VarDecl
*OrigVD
= getBaseDecl(Ref
);
7247 if (!Processed
.insert(OrigVD
).second
)
7249 // In order to identify the right initializer we need to match the
7250 // declaration used by the mapping logic. In some cases we may get
7251 // OMPCapturedExprDecl that refers to the original declaration.
7252 const ValueDecl
*MatchingVD
= OrigVD
;
7253 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(MatchingVD
)) {
7254 // OMPCapturedExprDecl are used to privative fields of the current
7256 const auto *ME
= cast
<MemberExpr
>(OED
->getInit());
7257 assert(isa
<CXXThisExpr
>(ME
->getBase()) &&
7258 "Base should be the current struct!");
7259 MatchingVD
= ME
->getMemberDecl();
7262 // If we don't have information about the current list item, move on to
7264 auto InitAddrIt
= CaptureDeviceAddrMap
.find(MatchingVD
);
7265 if (InitAddrIt
== CaptureDeviceAddrMap
.end())
7268 llvm::Type
*Ty
= ConvertTypeForMem(OrigVD
->getType().getNonReferenceType());
7271 Address(InitAddrIt
->second
, Ty
,
7272 getContext().getTypeAlignInChars(getContext().VoidPtrTy
));
7273 // For declrefs and variable length array need to load the pointer for
7274 // correct mapping, since the pointer to the data was passed to the runtime.
7275 if (isa
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts()) ||
7276 MatchingVD
->getType()->isArrayType()) {
7277 QualType PtrTy
= getContext().getPointerType(
7278 OrigVD
->getType().getNonReferenceType());
7280 EmitLoadOfPointer(PrivAddr
.withElementType(ConvertTypeForMem(PtrTy
)),
7281 PtrTy
->castAs
<PointerType
>());
7284 (void)PrivateScope
.addPrivate(OrigVD
, PrivAddr
);
7288 // Generate the instructions for '#pragma omp target data' directive.
7289 void CodeGenFunction::EmitOMPTargetDataDirective(
7290 const OMPTargetDataDirective
&S
) {
7291 CGOpenMPRuntime::TargetDataInfo
Info(/*RequiresDevicePointerInfo=*/true,
7292 /*SeparateBeginEndCalls=*/true);
7294 // Create a pre/post action to signal the privatization of the device pointer.
7295 // This action can be replaced by the OpenMP runtime code generation to
7296 // deactivate privatization.
7297 bool PrivatizeDevicePointers
= false;
7298 class DevicePointerPrivActionTy
: public PrePostActionTy
{
7299 bool &PrivatizeDevicePointers
;
7302 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers
)
7303 : PrivatizeDevicePointers(PrivatizeDevicePointers
) {}
7304 void Enter(CodeGenFunction
&CGF
) override
{
7305 PrivatizeDevicePointers
= true;
7308 DevicePointerPrivActionTy
PrivAction(PrivatizeDevicePointers
);
7310 auto &&CodeGen
= [&](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7311 auto &&InnermostCodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7312 CGF
.EmitStmt(S
.getInnermostCapturedStmt()->getCapturedStmt());
7315 // Codegen that selects whether to generate the privatization code or not.
7316 auto &&PrivCodeGen
= [&](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7317 RegionCodeGenTy
RCG(InnermostCodeGen
);
7318 PrivatizeDevicePointers
= false;
7320 // Call the pre-action to change the status of PrivatizeDevicePointers if
7324 if (PrivatizeDevicePointers
) {
7325 OMPPrivateScope
PrivateScope(CGF
);
7326 // Emit all instances of the use_device_ptr clause.
7327 for (const auto *C
: S
.getClausesOfKind
<OMPUseDevicePtrClause
>())
7328 CGF
.EmitOMPUseDevicePtrClause(*C
, PrivateScope
,
7329 Info
.CaptureDeviceAddrMap
);
7330 for (const auto *C
: S
.getClausesOfKind
<OMPUseDeviceAddrClause
>())
7331 CGF
.EmitOMPUseDeviceAddrClause(*C
, PrivateScope
,
7332 Info
.CaptureDeviceAddrMap
);
7333 (void)PrivateScope
.Privatize();
7336 // If we don't have target devices, don't bother emitting the data
7338 std::optional
<OpenMPDirectiveKind
> CaptureRegion
;
7339 if (CGM
.getLangOpts().OMPTargetTriples
.empty()) {
7340 // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7341 for (const auto *C
: S
.getClausesOfKind
<OMPUseDevicePtrClause
>())
7342 for (const Expr
*E
: C
->varlists()) {
7343 const Decl
*D
= cast
<DeclRefExpr
>(E
)->getDecl();
7344 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(D
))
7345 CGF
.EmitVarDecl(*OED
);
7347 for (const auto *C
: S
.getClausesOfKind
<OMPUseDeviceAddrClause
>())
7348 for (const Expr
*E
: C
->varlists()) {
7349 const Decl
*D
= getBaseDecl(E
);
7350 if (const auto *OED
= dyn_cast
<OMPCapturedExprDecl
>(D
))
7351 CGF
.EmitVarDecl(*OED
);
7354 CaptureRegion
= OMPD_unknown
;
7357 OMPLexicalScope
Scope(CGF
, S
, CaptureRegion
);
7362 // Forward the provided action to the privatization codegen.
7363 RegionCodeGenTy
PrivRCG(PrivCodeGen
);
7364 PrivRCG
.setAction(Action
);
7366 // Notwithstanding the body of the region is emitted as inlined directive,
7367 // we don't use an inline scope as changes in the references inside the
7368 // region are expected to be visible outside, so we do not privative them.
7369 OMPLexicalScope
Scope(CGF
, S
);
7370 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_target_data
,
7374 RegionCodeGenTy
RCG(CodeGen
);
7376 // If we don't have target devices, don't bother emitting the data mapping
7378 if (CGM
.getLangOpts().OMPTargetTriples
.empty()) {
7383 // Check if we have any if clause associated with the directive.
7384 const Expr
*IfCond
= nullptr;
7385 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7386 IfCond
= C
->getCondition();
7388 // Check if we have any device clause associated with the directive.
7389 const Expr
*Device
= nullptr;
7390 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7391 Device
= C
->getDevice();
7393 // Set the action to signal privatization of device pointers.
7394 RCG
.setAction(PrivAction
);
7396 // Emit region code.
7397 CGM
.getOpenMPRuntime().emitTargetDataCalls(*this, S
, IfCond
, Device
, RCG
,
7401 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7402 const OMPTargetEnterDataDirective
&S
) {
7403 // If we don't have target devices, don't bother emitting the data mapping
7405 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
7408 // Check if we have any if clause associated with the directive.
7409 const Expr
*IfCond
= nullptr;
7410 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7411 IfCond
= C
->getCondition();
7413 // Check if we have any device clause associated with the directive.
7414 const Expr
*Device
= nullptr;
7415 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7416 Device
= C
->getDevice();
7418 OMPLexicalScope
Scope(*this, S
, OMPD_task
);
7419 CGM
.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S
, IfCond
, Device
);
7422 void CodeGenFunction::EmitOMPTargetExitDataDirective(
7423 const OMPTargetExitDataDirective
&S
) {
7424 // If we don't have target devices, don't bother emitting the data mapping
7426 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
7429 // Check if we have any if clause associated with the directive.
7430 const Expr
*IfCond
= nullptr;
7431 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7432 IfCond
= C
->getCondition();
7434 // Check if we have any device clause associated with the directive.
7435 const Expr
*Device
= nullptr;
7436 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7437 Device
= C
->getDevice();
7439 OMPLexicalScope
Scope(*this, S
, OMPD_task
);
7440 CGM
.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S
, IfCond
, Device
);
7443 static void emitTargetParallelRegion(CodeGenFunction
&CGF
,
7444 const OMPTargetParallelDirective
&S
,
7445 PrePostActionTy
&Action
) {
7446 // Get the captured statement associated with the 'parallel' region.
7447 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_parallel
);
7449 auto &&CodeGen
= [&S
, CS
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7451 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
7452 (void)CGF
.EmitOMPFirstprivateClause(S
, PrivateScope
);
7453 CGF
.EmitOMPPrivateClause(S
, PrivateScope
);
7454 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7455 (void)PrivateScope
.Privatize();
7456 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()))
7457 CGF
.CGM
.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF
, S
);
7458 // TODO: Add support for clauses.
7459 CGF
.EmitStmt(CS
->getCapturedStmt());
7460 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_parallel
);
7462 emitCommonOMPParallelDirective(CGF
, S
, OMPD_parallel
, CodeGen
,
7463 emitEmptyBoundParameters
);
7464 emitPostUpdateForReductionClause(CGF
, S
,
7465 [](CodeGenFunction
&) { return nullptr; });
7468 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7469 CodeGenModule
&CGM
, StringRef ParentName
,
7470 const OMPTargetParallelDirective
&S
) {
7471 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7472 emitTargetParallelRegion(CGF
, S
, Action
);
7475 llvm::Constant
*Addr
;
7476 // Emit target region as a standalone region.
7477 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7478 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7479 assert(Fn
&& Addr
&& "Target device function emission failed.");
7482 void CodeGenFunction::EmitOMPTargetParallelDirective(
7483 const OMPTargetParallelDirective
&S
) {
7484 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7485 emitTargetParallelRegion(CGF
, S
, Action
);
7487 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7490 static void emitTargetParallelForRegion(CodeGenFunction
&CGF
,
7491 const OMPTargetParallelForDirective
&S
,
7492 PrePostActionTy
&Action
) {
7494 // Emit directive as a combined directive that consists of two implicit
7495 // directives: 'parallel' with 'for' directive.
7496 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7498 CodeGenFunction::OMPCancelStackRAII
CancelRegion(
7499 CGF
, OMPD_target_parallel_for
, S
.hasCancel());
7500 CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(), emitForLoopBounds
,
7501 emitDispatchForLoopBounds
);
7503 emitCommonOMPParallelDirective(CGF
, S
, OMPD_for
, CodeGen
,
7504 emitEmptyBoundParameters
);
7507 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7508 CodeGenModule
&CGM
, StringRef ParentName
,
7509 const OMPTargetParallelForDirective
&S
) {
7510 // Emit SPMD target parallel for region as a standalone region.
7511 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7512 emitTargetParallelForRegion(CGF
, S
, Action
);
7515 llvm::Constant
*Addr
;
7516 // Emit target region as a standalone region.
7517 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7518 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7519 assert(Fn
&& Addr
&& "Target device function emission failed.");
7522 void CodeGenFunction::EmitOMPTargetParallelForDirective(
7523 const OMPTargetParallelForDirective
&S
) {
7524 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7525 emitTargetParallelForRegion(CGF
, S
, Action
);
7527 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7531 emitTargetParallelForSimdRegion(CodeGenFunction
&CGF
,
7532 const OMPTargetParallelForSimdDirective
&S
,
7533 PrePostActionTy
&Action
) {
7535 // Emit directive as a combined directive that consists of two implicit
7536 // directives: 'parallel' with 'for' directive.
7537 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7539 CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(), emitForLoopBounds
,
7540 emitDispatchForLoopBounds
);
7542 emitCommonOMPParallelDirective(CGF
, S
, OMPD_simd
, CodeGen
,
7543 emitEmptyBoundParameters
);
7546 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7547 CodeGenModule
&CGM
, StringRef ParentName
,
7548 const OMPTargetParallelForSimdDirective
&S
) {
7549 // Emit SPMD target parallel for region as a standalone region.
7550 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7551 emitTargetParallelForSimdRegion(CGF
, S
, Action
);
7554 llvm::Constant
*Addr
;
7555 // Emit target region as a standalone region.
7556 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7557 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7558 assert(Fn
&& Addr
&& "Target device function emission failed.");
7561 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7562 const OMPTargetParallelForSimdDirective
&S
) {
7563 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7564 emitTargetParallelForSimdRegion(CGF
, S
, Action
);
7566 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7569 /// Emit a helper variable and return corresponding lvalue.
7570 static void mapParam(CodeGenFunction
&CGF
, const DeclRefExpr
*Helper
,
7571 const ImplicitParamDecl
*PVD
,
7572 CodeGenFunction::OMPPrivateScope
&Privates
) {
7573 const auto *VDecl
= cast
<VarDecl
>(Helper
->getDecl());
7574 Privates
.addPrivate(VDecl
, CGF
.GetAddrOfLocalVar(PVD
));
7577 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective
&S
) {
7578 assert(isOpenMPTaskLoopDirective(S
.getDirectiveKind()));
7579 // Emit outlined function for task construct.
7580 const CapturedStmt
*CS
= S
.getCapturedStmt(OMPD_taskloop
);
7581 Address CapturedStruct
= Address::invalid();
7583 OMPLexicalScope
Scope(*this, S
, OMPD_taskloop
, /*EmitPreInitStmt=*/false);
7584 CapturedStruct
= GenerateCapturedStmtArgument(*CS
);
7586 QualType SharedsTy
= getContext().getRecordType(CS
->getCapturedRecordDecl());
7587 const Expr
*IfCond
= nullptr;
7588 for (const auto *C
: S
.getClausesOfKind
<OMPIfClause
>()) {
7589 if (C
->getNameModifier() == OMPD_unknown
||
7590 C
->getNameModifier() == OMPD_taskloop
) {
7591 IfCond
= C
->getCondition();
7597 // Check if taskloop must be emitted without taskgroup.
7598 Data
.Nogroup
= S
.getSingleClause
<OMPNogroupClause
>();
7599 // TODO: Check if we should emit tied or untied task.
7601 // Set scheduling for taskloop
7602 if (const auto *Clause
= S
.getSingleClause
<OMPGrainsizeClause
>()) {
7604 Data
.Schedule
.setInt(/*IntVal=*/false);
7605 Data
.Schedule
.setPointer(EmitScalarExpr(Clause
->getGrainsize()));
7606 } else if (const auto *Clause
= S
.getSingleClause
<OMPNumTasksClause
>()) {
7608 Data
.Schedule
.setInt(/*IntVal=*/true);
7609 Data
.Schedule
.setPointer(EmitScalarExpr(Clause
->getNumTasks()));
7612 auto &&BodyGen
= [CS
, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7614 // for (IV in 0..LastIteration) BODY;
7615 // <Final counter/linear vars updates>;
7619 // Emit: if (PreCond) - begin.
7620 // If the condition constant folds and can be elided, avoid emitting the
7623 llvm::BasicBlock
*ContBlock
= nullptr;
7624 OMPLoopScope
PreInitScope(CGF
, S
);
7625 if (CGF
.ConstantFoldsToSimpleInteger(S
.getPreCond(), CondConstant
)) {
7629 llvm::BasicBlock
*ThenBlock
= CGF
.createBasicBlock("taskloop.if.then");
7630 ContBlock
= CGF
.createBasicBlock("taskloop.if.end");
7631 emitPreCond(CGF
, S
, S
.getPreCond(), ThenBlock
, ContBlock
,
7632 CGF
.getProfileCount(&S
));
7633 CGF
.EmitBlock(ThenBlock
);
7634 CGF
.incrementProfileCounter(&S
);
7637 (void)CGF
.EmitOMPLinearClauseInit(S
);
7639 OMPPrivateScope
LoopScope(CGF
);
7640 // Emit helper vars inits.
7641 enum { LowerBound
= 5, UpperBound
, Stride
, LastIter
};
7642 auto *I
= CS
->getCapturedDecl()->param_begin();
7643 auto *LBP
= std::next(I
, LowerBound
);
7644 auto *UBP
= std::next(I
, UpperBound
);
7645 auto *STP
= std::next(I
, Stride
);
7646 auto *LIP
= std::next(I
, LastIter
);
7647 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getLowerBoundVariable()), *LBP
,
7649 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getUpperBoundVariable()), *UBP
,
7651 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getStrideVariable()), *STP
, LoopScope
);
7652 mapParam(CGF
, cast
<DeclRefExpr
>(S
.getIsLastIterVariable()), *LIP
,
7654 CGF
.EmitOMPPrivateLoopCounters(S
, LoopScope
);
7655 CGF
.EmitOMPLinearClause(S
, LoopScope
);
7656 bool HasLastprivateClause
= CGF
.EmitOMPLastprivateClauseInit(S
, LoopScope
);
7657 (void)LoopScope
.Privatize();
7658 // Emit the loop iteration variable.
7659 const Expr
*IVExpr
= S
.getIterationVariable();
7660 const auto *IVDecl
= cast
<VarDecl
>(cast
<DeclRefExpr
>(IVExpr
)->getDecl());
7661 CGF
.EmitVarDecl(*IVDecl
);
7662 CGF
.EmitIgnoredExpr(S
.getInit());
7664 // Emit the iterations count variable.
7665 // If it is not a variable, Sema decided to calculate iterations count on
7666 // each iteration (e.g., it is foldable into a constant).
7667 if (const auto *LIExpr
= dyn_cast
<DeclRefExpr
>(S
.getLastIteration())) {
7668 CGF
.EmitVarDecl(*cast
<VarDecl
>(LIExpr
->getDecl()));
7669 // Emit calculation of the iterations count.
7670 CGF
.EmitIgnoredExpr(S
.getCalcLastIteration());
7674 OMPLexicalScope
Scope(CGF
, S
, OMPD_taskloop
, /*EmitPreInitStmt=*/false);
7677 [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7678 if (isOpenMPSimdDirective(S
.getDirectiveKind()))
7679 CGF
.EmitOMPSimdInit(S
);
7681 [&S
, &LoopScope
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7682 CGF
.EmitOMPInnerLoop(
7683 S
, LoopScope
.requiresCleanups(), S
.getCond(), S
.getInc(),
7684 [&S
](CodeGenFunction
&CGF
) {
7685 emitOMPLoopBodyWithStopPoint(CGF
, S
,
7686 CodeGenFunction::JumpDest());
7688 [](CodeGenFunction
&) {});
7691 // Emit: if (PreCond) - end.
7693 CGF
.EmitBranch(ContBlock
);
7694 CGF
.EmitBlock(ContBlock
, true);
7696 // Emit final copy of the lastprivate variables if IsLastIter != 0.
7697 if (HasLastprivateClause
) {
7698 CGF
.EmitOMPLastprivateClauseFinal(
7699 S
, isOpenMPSimdDirective(S
.getDirectiveKind()),
7700 CGF
.Builder
.CreateIsNotNull(CGF
.EmitLoadOfScalar(
7701 CGF
.GetAddrOfLocalVar(*LIP
), /*Volatile=*/false,
7702 (*LIP
)->getType(), S
.getBeginLoc())));
7704 LoopScope
.restoreMap();
7705 CGF
.EmitOMPLinearClauseFinal(S
, [LIP
, &S
](CodeGenFunction
&CGF
) {
7706 return CGF
.Builder
.CreateIsNotNull(
7707 CGF
.EmitLoadOfScalar(CGF
.GetAddrOfLocalVar(*LIP
), /*Volatile=*/false,
7708 (*LIP
)->getType(), S
.getBeginLoc()));
7711 auto &&TaskGen
= [&S
, SharedsTy
, CapturedStruct
,
7712 IfCond
](CodeGenFunction
&CGF
, llvm::Function
*OutlinedFn
,
7713 const OMPTaskDataTy
&Data
) {
7714 auto &&CodeGen
= [&S
, OutlinedFn
, SharedsTy
, CapturedStruct
, IfCond
,
7715 &Data
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7716 OMPLoopScope
PreInitScope(CGF
, S
);
7717 CGF
.CGM
.getOpenMPRuntime().emitTaskLoopCall(CGF
, S
.getBeginLoc(), S
,
7718 OutlinedFn
, SharedsTy
,
7719 CapturedStruct
, IfCond
, Data
);
7721 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_taskloop
,
7725 EmitOMPTaskBasedDirective(S
, OMPD_taskloop
, BodyGen
, TaskGen
, Data
);
7727 CGM
.getOpenMPRuntime().emitTaskgroupRegion(
7729 [&S
, &BodyGen
, &TaskGen
, &Data
](CodeGenFunction
&CGF
,
7730 PrePostActionTy
&Action
) {
7732 CGF
.EmitOMPTaskBasedDirective(S
, OMPD_taskloop
, BodyGen
, TaskGen
,
7739 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective
&S
) {
7741 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7742 EmitOMPTaskLoopBasedDirective(S
);
7745 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
7746 const OMPTaskLoopSimdDirective
&S
) {
7748 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7749 OMPLexicalScope
Scope(*this, S
);
7750 EmitOMPTaskLoopBasedDirective(S
);
7753 void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7754 const OMPMasterTaskLoopDirective
&S
) {
7755 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7757 EmitOMPTaskLoopBasedDirective(S
);
7760 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7761 OMPLexicalScope
Scope(*this, S
, std::nullopt
, /*EmitPreInitStmt=*/false);
7762 CGM
.getOpenMPRuntime().emitMasterRegion(*this, CodeGen
, S
.getBeginLoc());
7765 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7766 const OMPMasterTaskLoopSimdDirective
&S
) {
7767 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7769 EmitOMPTaskLoopBasedDirective(S
);
7772 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7773 OMPLexicalScope
Scope(*this, S
);
7774 CGM
.getOpenMPRuntime().emitMasterRegion(*this, CodeGen
, S
.getBeginLoc());
7777 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7778 const OMPParallelMasterTaskLoopDirective
&S
) {
7779 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7780 auto &&TaskLoopCodeGen
= [&S
](CodeGenFunction
&CGF
,
7781 PrePostActionTy
&Action
) {
7783 CGF
.EmitOMPTaskLoopBasedDirective(S
);
7785 OMPLexicalScope
Scope(CGF
, S
, OMPD_parallel
, /*EmitPreInitStmt=*/false);
7786 CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, TaskLoopCodeGen
,
7790 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7791 emitCommonOMPParallelDirective(*this, S
, OMPD_master_taskloop
, CodeGen
,
7792 emitEmptyBoundParameters
);
7795 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7796 const OMPParallelMasterTaskLoopSimdDirective
&S
) {
7797 auto &&CodeGen
= [this, &S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7798 auto &&TaskLoopCodeGen
= [&S
](CodeGenFunction
&CGF
,
7799 PrePostActionTy
&Action
) {
7801 CGF
.EmitOMPTaskLoopBasedDirective(S
);
7803 OMPLexicalScope
Scope(CGF
, S
, OMPD_parallel
, /*EmitPreInitStmt=*/false);
7804 CGM
.getOpenMPRuntime().emitMasterRegion(CGF
, TaskLoopCodeGen
,
7808 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7809 emitCommonOMPParallelDirective(*this, S
, OMPD_master_taskloop_simd
, CodeGen
,
7810 emitEmptyBoundParameters
);
7813 // Generate the instructions for '#pragma omp target update' directive.
7814 void CodeGenFunction::EmitOMPTargetUpdateDirective(
7815 const OMPTargetUpdateDirective
&S
) {
7816 // If we don't have target devices, don't bother emitting the data mapping
7818 if (CGM
.getLangOpts().OMPTargetTriples
.empty())
7821 // Check if we have any if clause associated with the directive.
7822 const Expr
*IfCond
= nullptr;
7823 if (const auto *C
= S
.getSingleClause
<OMPIfClause
>())
7824 IfCond
= C
->getCondition();
7826 // Check if we have any device clause associated with the directive.
7827 const Expr
*Device
= nullptr;
7828 if (const auto *C
= S
.getSingleClause
<OMPDeviceClause
>())
7829 Device
= C
->getDevice();
7831 OMPLexicalScope
Scope(*this, S
, OMPD_task
);
7832 CGM
.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S
, IfCond
, Device
);
7835 void CodeGenFunction::EmitOMPGenericLoopDirective(
7836 const OMPGenericLoopDirective
&S
) {
7837 // Unimplemented, just inline the underlying statement for now.
7838 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7839 // Emit the loop iteration variable.
7841 cast
<CapturedStmt
>(S
.getAssociatedStmt())->getCapturedStmt();
7842 const auto *ForS
= dyn_cast
<ForStmt
>(CS
);
7843 if (ForS
&& !isa
<DeclStmt
>(ForS
->getInit())) {
7844 OMPPrivateScope
LoopScope(CGF
);
7845 CGF
.EmitOMPPrivateLoopCounters(S
, LoopScope
);
7846 (void)LoopScope
.Privatize();
7848 LoopScope
.restoreMap();
7853 OMPLexicalScope
Scope(*this, S
, OMPD_unknown
);
7854 CGM
.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop
, CodeGen
);
7857 void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
7858 const OMPLoopDirective
&S
) {
7859 // Emit combined directive as if its consituent constructs are 'parallel'
7861 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7863 emitOMPCopyinClause(CGF
, S
);
7864 (void)emitWorksharingDirective(CGF
, S
, /*HasCancel=*/false);
7868 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S
);
7869 emitCommonOMPParallelDirective(*this, S
, OMPD_for
, CodeGen
,
7870 emitEmptyBoundParameters
);
7872 // Check for outer lastprivate conditional update.
7873 checkForLastprivateConditionalUpdate(*this, S
);
7876 void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
7877 const OMPTeamsGenericLoopDirective
&S
) {
7878 // To be consistent with current behavior of 'target teams loop', emit
7879 // 'teams loop' as if its constituent constructs are 'distribute,
7880 // 'parallel, and 'for'.
7881 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7882 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
7886 // Emit teams region as a standalone region.
7887 auto &&CodeGen
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
7888 PrePostActionTy
&Action
) {
7890 OMPPrivateScope
PrivateScope(CGF
);
7891 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7892 (void)PrivateScope
.Privatize();
7893 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(CGF
, OMPD_distribute
,
7895 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
7897 emitCommonOMPTeamsDirective(*this, S
, OMPD_distribute_parallel_for
, CodeGen
);
7898 emitPostUpdateForReductionClause(*this, S
,
7899 [](CodeGenFunction
&) { return nullptr; });
7903 emitTargetTeamsGenericLoopRegion(CodeGenFunction
&CGF
,
7904 const OMPTargetTeamsGenericLoopDirective
&S
,
7905 PrePostActionTy
&Action
) {
7907 // Emit 'teams loop' as if its constituent constructs are 'distribute,
7908 // 'parallel, and 'for'.
7909 auto &&CodeGenDistribute
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
7910 CGF
.EmitOMPDistributeLoop(S
, emitInnerParallelForWhenCombined
,
7914 // Emit teams region as a standalone region.
7915 auto &&CodeGenTeams
= [&S
, &CodeGenDistribute
](CodeGenFunction
&CGF
,
7916 PrePostActionTy
&Action
) {
7918 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
7919 CGF
.EmitOMPReductionClauseInit(S
, PrivateScope
);
7920 (void)PrivateScope
.Privatize();
7921 CGF
.CGM
.getOpenMPRuntime().emitInlinedDirective(
7922 CGF
, OMPD_distribute
, CodeGenDistribute
, /*HasCancel=*/false);
7923 CGF
.EmitOMPReductionClauseFinal(S
, /*ReductionKind=*/OMPD_teams
);
7926 emitCommonOMPTeamsDirective(CGF
, S
, OMPD_distribute_parallel_for
,
7928 emitPostUpdateForReductionClause(CGF
, S
,
7929 [](CodeGenFunction
&) { return nullptr; });
7932 /// Emit combined directive 'target teams loop' as if its constituent
7933 /// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'.
7934 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
7935 const OMPTargetTeamsGenericLoopDirective
&S
) {
7936 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7937 emitTargetTeamsGenericLoopRegion(CGF
, S
, Action
);
7939 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7942 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
7943 CodeGenModule
&CGM
, StringRef ParentName
,
7944 const OMPTargetTeamsGenericLoopDirective
&S
) {
7945 // Emit SPMD target parallel loop region as a standalone region.
7946 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7947 emitTargetTeamsGenericLoopRegion(CGF
, S
, Action
);
7950 llvm::Constant
*Addr
;
7951 // Emit target region as a standalone region.
7952 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7953 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7954 assert(Fn
&& Addr
&&
7955 "Target device function emission failed for 'target teams loop'.");
7958 static void emitTargetParallelGenericLoopRegion(
7959 CodeGenFunction
&CGF
, const OMPTargetParallelGenericLoopDirective
&S
,
7960 PrePostActionTy
&Action
) {
7962 // Emit as 'parallel for'.
7963 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7965 CodeGenFunction::OMPCancelStackRAII
CancelRegion(
7966 CGF
, OMPD_target_parallel_loop
, /*hasCancel=*/false);
7967 CGF
.EmitOMPWorksharingLoop(S
, S
.getEnsureUpperBound(), emitForLoopBounds
,
7968 emitDispatchForLoopBounds
);
7970 emitCommonOMPParallelDirective(CGF
, S
, OMPD_for
, CodeGen
,
7971 emitEmptyBoundParameters
);
7974 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
7975 CodeGenModule
&CGM
, StringRef ParentName
,
7976 const OMPTargetParallelGenericLoopDirective
&S
) {
7977 // Emit target parallel loop region as a standalone region.
7978 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7979 emitTargetParallelGenericLoopRegion(CGF
, S
, Action
);
7982 llvm::Constant
*Addr
;
7983 // Emit target region as a standalone region.
7984 CGM
.getOpenMPRuntime().emitTargetOutlinedFunction(
7985 S
, ParentName
, Fn
, Addr
, /*IsOffloadEntry=*/true, CodeGen
);
7986 assert(Fn
&& Addr
&& "Target device function emission failed.");
7989 /// Emit combined directive 'target parallel loop' as if its constituent
7990 /// constructs are 'target', 'parallel', and 'for'.
7991 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
7992 const OMPTargetParallelGenericLoopDirective
&S
) {
7993 auto &&CodeGen
= [&S
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
7994 emitTargetParallelGenericLoopRegion(CGF
, S
, Action
);
7996 emitCommonOMPTargetDirective(*this, S
, CodeGen
);
7999 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
8000 const OMPExecutableDirective
&D
) {
8001 if (const auto *SD
= dyn_cast
<OMPScanDirective
>(&D
)) {
8002 EmitOMPScanDirective(*SD
);
8005 if (!D
.hasAssociatedStmt() || !D
.getAssociatedStmt())
8007 auto &&CodeGen
= [&D
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
8008 OMPPrivateScope
GlobalsScope(CGF
);
8009 if (isOpenMPTaskingDirective(D
.getDirectiveKind())) {
8010 // Capture global firstprivates to avoid crash.
8011 for (const auto *C
: D
.getClausesOfKind
<OMPFirstprivateClause
>()) {
8012 for (const Expr
*Ref
: C
->varlists()) {
8013 const auto *DRE
= cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
8016 const auto *VD
= dyn_cast
<VarDecl
>(DRE
->getDecl());
8017 if (!VD
|| VD
->hasLocalStorage())
8019 if (!CGF
.LocalDeclMap
.count(VD
)) {
8020 LValue GlobLVal
= CGF
.EmitLValue(Ref
);
8021 GlobalsScope
.addPrivate(VD
, GlobLVal
.getAddress(CGF
));
8026 if (isOpenMPSimdDirective(D
.getDirectiveKind())) {
8027 (void)GlobalsScope
.Privatize();
8028 ParentLoopDirectiveForScanRegion
ScanRegion(CGF
, D
);
8029 emitOMPSimdRegion(CGF
, cast
<OMPLoopDirective
>(D
), Action
);
8031 if (const auto *LD
= dyn_cast
<OMPLoopDirective
>(&D
)) {
8032 for (const Expr
*E
: LD
->counters()) {
8033 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
8034 if (!VD
->hasLocalStorage() && !CGF
.LocalDeclMap
.count(VD
)) {
8035 LValue GlobLVal
= CGF
.EmitLValue(E
);
8036 GlobalsScope
.addPrivate(VD
, GlobLVal
.getAddress(CGF
));
8038 if (isa
<OMPCapturedExprDecl
>(VD
)) {
8039 // Emit only those that were not explicitly referenced in clauses.
8040 if (!CGF
.LocalDeclMap
.count(VD
))
8041 CGF
.EmitVarDecl(*VD
);
8044 for (const auto *C
: D
.getClausesOfKind
<OMPOrderedClause
>()) {
8045 if (!C
->getNumForLoops())
8047 for (unsigned I
= LD
->getLoopsNumber(),
8048 E
= C
->getLoopNumIterations().size();
8050 if (const auto *VD
= dyn_cast
<OMPCapturedExprDecl
>(
8051 cast
<DeclRefExpr
>(C
->getLoopCounter(I
))->getDecl())) {
8052 // Emit only those that were not explicitly referenced in clauses.
8053 if (!CGF
.LocalDeclMap
.count(VD
))
8054 CGF
.EmitVarDecl(*VD
);
8059 (void)GlobalsScope
.Privatize();
8060 CGF
.EmitStmt(D
.getInnermostCapturedStmt()->getCapturedStmt());
8063 if (D
.getDirectiveKind() == OMPD_atomic
||
8064 D
.getDirectiveKind() == OMPD_critical
||
8065 D
.getDirectiveKind() == OMPD_section
||
8066 D
.getDirectiveKind() == OMPD_master
||
8067 D
.getDirectiveKind() == OMPD_masked
||
8068 D
.getDirectiveKind() == OMPD_unroll
) {
8069 EmitStmt(D
.getAssociatedStmt());
8072 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D
);
8073 OMPSimdLexicalScope
Scope(*this, D
);
8074 CGM
.getOpenMPRuntime().emitInlinedDirective(
8076 isOpenMPSimdDirective(D
.getDirectiveKind()) ? OMPD_simd
8077 : D
.getDirectiveKind(),
8080 // Check for outer lastprivate conditional update.
8081 checkForLastprivateConditionalUpdate(*this, D
);