1 //===-- runtime/reduction-templates.h ---------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Generic function templates used by various reduction transformation
10 // intrinsic functions (SUM, PRODUCT, &c.)
12 // * Partial reductions (i.e., those with DIM= arguments that are not
13 // required to be 1 by the rank of the argument) return arrays that
14 // are dynamically allocated in a caller-supplied descriptor.
15 // * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC
16 // return integer vectors of some kind, not scalars; a caller-supplied
18 // * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
19 // length results, dynamically allocated in a caller-supplied descriptor
21 #ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
22 #define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
24 #include "numeric-templates.h"
25 #include "terminator.h"
27 #include "flang/Runtime/cpp-type.h"
28 #include "flang/Runtime/descriptor.h"
31 namespace Fortran::runtime
{
33 // Reductions are implemented with *accumulators*, which are instances of
34 // classes that incrementally build up the result (or an element thereof) during
35 // a traversal of the unmasked elements of an array. Each accumulator class
36 // supports a constructor (which captures a reference to the array), an
37 // AccumulateAt() member function that applies supplied subscripts to the
38 // array and does something with a scalar element, and a GetResult()
39 // member function that copies a final result into its destination.
41 // Total reduction of the array argument to a scalar (or to a vector in the
42 // cases of FINDLOC, MAXLOC, & MINLOC). These are the cases without DIM= or
43 // cases where the argument has rank 1 and DIM=, if present, must be 1.
44 template <typename TYPE
, typename ACCUMULATOR
>
45 inline RT_API_ATTRS
void DoTotalReduction(const Descriptor
&x
, int dim
,
46 const Descriptor
*mask
, ACCUMULATOR
&accumulator
, const char *intrinsic
,
47 Terminator
&terminator
) {
48 if (dim
< 0 || dim
> 1) {
49 terminator
.Crash("%s: bad DIM=%d for ARRAY argument with rank %d",
50 intrinsic
, dim
, x
.rank());
52 SubscriptValue xAt
[maxRank
];
53 x
.GetLowerBounds(xAt
);
55 CheckConformability(x
, *mask
, terminator
, intrinsic
, "ARRAY", "MASK");
56 if (mask
->rank() > 0) {
57 SubscriptValue maskAt
[maxRank
];
58 mask
->GetLowerBounds(maskAt
);
59 for (auto elements
{x
.Elements()}; elements
--;
60 x
.IncrementSubscripts(xAt
), mask
->IncrementSubscripts(maskAt
)) {
61 if (IsLogicalElementTrue(*mask
, maskAt
)) {
62 if (!accumulator
.template AccumulateAt
<TYPE
>(xAt
)) {
68 } else if (!IsLogicalScalarTrue(*mask
)) {
69 // scalar MASK=.FALSE.: return identity value
73 // No MASK=, or scalar MASK=.TRUE.
74 for (auto elements
{x
.Elements()}; elements
--; x
.IncrementSubscripts(xAt
)) {
75 if (!accumulator
.template AccumulateAt
<TYPE
>(xAt
)) {
76 break; // cut short, result is known
81 template <TypeCategory CAT
, int KIND
, typename ACCUMULATOR
>
82 inline RT_API_ATTRS CppTypeFor
<CAT
, KIND
> GetTotalReduction(const Descriptor
&x
,
83 const char *source
, int line
, int dim
, const Descriptor
*mask
,
84 ACCUMULATOR
&&accumulator
, const char *intrinsic
) {
85 Terminator terminator
{source
, line
};
86 RUNTIME_CHECK(terminator
, TypeCode(CAT
, KIND
) == x
.type());
87 using CppType
= CppTypeFor
<CAT
, KIND
>;
88 DoTotalReduction
<CppType
>(x
, dim
, mask
, accumulator
, intrinsic
, terminator
);
89 if constexpr (std::is_void_v
<CppType
>) {
90 // Result is returned from accumulator, as in REDUCE() for derived type
91 #ifdef _MSC_VER // work around MSVC spurious error
92 accumulator
.GetResult();
94 accumulator
.template GetResult
<CppType
>();
98 #ifdef _MSC_VER // work around MSVC spurious error
99 accumulator
.GetResult(&result
);
101 accumulator
.template GetResult
<CppType
>(&result
);
107 // For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
108 // of the array is [2,3,5], the shape of the result is [2,5] and
109 // result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
110 // lower bounds other than one. This utility subroutine creates an
111 // array of subscripts [j,_,k] for result subscripts [j,k] so that the
112 // elements of array(j,:,k) can be reduced.
113 inline RT_API_ATTRS
void GetExpandedSubscripts(SubscriptValue at
[],
114 const Descriptor
&descriptor
, int zeroBasedDim
,
115 const SubscriptValue from
[]) {
116 descriptor
.GetLowerBounds(at
);
117 int rank
{descriptor
.rank()};
119 for (; j
< zeroBasedDim
; ++j
) {
120 at
[j
] += from
[j
] - 1 /*lower bound*/;
122 for (++j
; j
< rank
; ++j
) {
123 at
[j
] += from
[j
- 1] - 1;
127 template <typename TYPE
, typename ACCUMULATOR
>
128 inline RT_API_ATTRS
void ReduceDimToScalar(const Descriptor
&x
,
129 int zeroBasedDim
, SubscriptValue subscripts
[], TYPE
*result
,
130 ACCUMULATOR
&accumulator
) {
131 SubscriptValue xAt
[maxRank
];
132 GetExpandedSubscripts(xAt
, x
, zeroBasedDim
, subscripts
);
133 const auto &dim
{x
.GetDimension(zeroBasedDim
)};
134 SubscriptValue at
{dim
.LowerBound()};
135 for (auto n
{dim
.Extent()}; n
-- > 0; ++at
) {
136 xAt
[zeroBasedDim
] = at
;
137 if (!accumulator
.template AccumulateAt
<TYPE
>(xAt
)) {
141 #ifdef _MSC_VER // work around MSVC spurious error
142 accumulator
.GetResult(result
, zeroBasedDim
);
144 accumulator
.template GetResult
<TYPE
>(result
, zeroBasedDim
);
148 template <typename TYPE
, typename ACCUMULATOR
>
149 inline RT_API_ATTRS
void ReduceDimMaskToScalar(const Descriptor
&x
,
150 int zeroBasedDim
, SubscriptValue subscripts
[], const Descriptor
&mask
,
151 TYPE
*result
, ACCUMULATOR
&accumulator
) {
152 SubscriptValue xAt
[maxRank
], maskAt
[maxRank
];
153 GetExpandedSubscripts(xAt
, x
, zeroBasedDim
, subscripts
);
154 GetExpandedSubscripts(maskAt
, mask
, zeroBasedDim
, subscripts
);
155 const auto &xDim
{x
.GetDimension(zeroBasedDim
)};
156 SubscriptValue xPos
{xDim
.LowerBound()};
157 const auto &maskDim
{mask
.GetDimension(zeroBasedDim
)};
158 SubscriptValue maskPos
{maskDim
.LowerBound()};
159 for (auto n
{x
.GetDimension(zeroBasedDim
).Extent()}; n
-- > 0;
161 maskAt
[zeroBasedDim
] = maskPos
;
162 if (IsLogicalElementTrue(mask
, maskAt
)) {
163 xAt
[zeroBasedDim
] = xPos
;
164 if (!accumulator
.template AccumulateAt
<TYPE
>(xAt
)) {
169 #ifdef _MSC_VER // work around MSVC spurious error
170 accumulator
.GetResult(result
, zeroBasedDim
);
172 accumulator
.template GetResult
<TYPE
>(result
, zeroBasedDim
);
176 // Partial reductions with DIM=
178 template <typename ACCUMULATOR
, TypeCategory CAT
, int KIND
>
179 inline RT_API_ATTRS
void PartialReduction(Descriptor
&result
,
180 const Descriptor
&x
, std::size_t resultElementSize
, int dim
,
181 const Descriptor
*mask
, Terminator
&terminator
, const char *intrinsic
,
182 ACCUMULATOR
&accumulator
) {
183 CreatePartialReductionResult(result
, x
, resultElementSize
, dim
, terminator
,
184 intrinsic
, TypeCode
{CAT
, KIND
});
185 SubscriptValue at
[maxRank
];
186 result
.GetLowerBounds(at
);
187 INTERNAL_CHECK(result
.rank() == 0 || at
[0] == 1);
188 using CppType
= CppTypeFor
<CAT
, KIND
>;
190 CheckConformability(x
, *mask
, terminator
, intrinsic
, "ARRAY", "MASK");
191 if (mask
->rank() > 0) {
192 for (auto n
{result
.Elements()}; n
-- > 0; result
.IncrementSubscripts(at
)) {
193 accumulator
.Reinitialize();
194 ReduceDimMaskToScalar
<CppType
, ACCUMULATOR
>(
195 x
, dim
- 1, at
, *mask
, result
.Element
<CppType
>(at
), accumulator
);
198 } else if (!IsLogicalScalarTrue(*mask
)) {
199 // scalar MASK=.FALSE.
200 accumulator
.Reinitialize();
201 for (auto n
{result
.Elements()}; n
-- > 0; result
.IncrementSubscripts(at
)) {
202 accumulator
.GetResult(result
.Element
<CppType
>(at
));
207 // No MASK= or scalar MASK=.TRUE.
208 for (auto n
{result
.Elements()}; n
-- > 0; result
.IncrementSubscripts(at
)) {
209 accumulator
.Reinitialize();
210 ReduceDimToScalar
<CppType
, ACCUMULATOR
>(
211 x
, dim
- 1, at
, result
.Element
<CppType
>(at
), accumulator
);
215 template <template <typename
> class ACCUM
>
216 struct PartialIntegerReductionHelper
{
217 template <int KIND
> struct Functor
{
218 static constexpr int Intermediate
{
219 std::max(KIND
, 4)}; // use at least "int" for intermediate results
220 RT_API_ATTRS
void operator()(Descriptor
&result
, const Descriptor
&x
,
221 int dim
, const Descriptor
*mask
, Terminator
&terminator
,
222 const char *intrinsic
) const {
224 ACCUM
<CppTypeFor
<TypeCategory::Integer
, Intermediate
>>;
225 Accumulator accumulator
{x
};
226 // Element size of the destination descriptor is the same
227 // as the element size of the source.
228 PartialReduction
<Accumulator
, TypeCategory::Integer
, KIND
>(result
, x
,
229 x
.ElementBytes(), dim
, mask
, terminator
, intrinsic
, accumulator
);
234 template <template <typename
> class INTEGER_ACCUM
>
235 inline RT_API_ATTRS
void PartialIntegerReduction(Descriptor
&result
,
236 const Descriptor
&x
, int dim
, int kind
, const Descriptor
*mask
,
237 const char *intrinsic
, Terminator
&terminator
) {
239 PartialIntegerReductionHelper
<INTEGER_ACCUM
>::template Functor
, void>(
240 kind
, terminator
, result
, x
, dim
, mask
, terminator
, intrinsic
);
243 template <TypeCategory CAT
, template <typename
> class ACCUM
, int MIN_KIND
>
244 struct PartialFloatingReductionHelper
{
245 template <int KIND
> struct Functor
{
246 static constexpr int Intermediate
{std::max(KIND
, MIN_KIND
)};
247 RT_API_ATTRS
void operator()(Descriptor
&result
, const Descriptor
&x
,
248 int dim
, const Descriptor
*mask
, Terminator
&terminator
,
249 const char *intrinsic
) const {
250 using Accumulator
= ACCUM
<CppTypeFor
<TypeCategory::Real
, Intermediate
>>;
251 Accumulator accumulator
{x
};
252 // Element size of the destination descriptor is the same
253 // as the element size of the source.
254 PartialReduction
<Accumulator
, CAT
, KIND
>(result
, x
, x
.ElementBytes(), dim
,
255 mask
, terminator
, intrinsic
, accumulator
);
260 template <template <typename
> class INTEGER_ACCUM
,
261 template <typename
> class REAL_ACCUM
,
262 template <typename
> class COMPLEX_ACCUM
, int MIN_REAL_KIND
>
263 inline RT_API_ATTRS
void TypedPartialNumericReduction(Descriptor
&result
,
264 const Descriptor
&x
, int dim
, const char *source
, int line
,
265 const Descriptor
*mask
, const char *intrinsic
) {
266 Terminator terminator
{source
, line
};
267 auto catKind
{x
.type().GetCategoryAndKind()};
268 RUNTIME_CHECK(terminator
, catKind
.has_value());
269 switch (catKind
->first
) {
270 case TypeCategory::Integer
:
271 PartialIntegerReduction
<INTEGER_ACCUM
>(
272 result
, x
, dim
, catKind
->second
, mask
, intrinsic
, terminator
);
274 case TypeCategory::Real
:
275 ApplyFloatingPointKind
<PartialFloatingReductionHelper
<TypeCategory::Real
,
276 REAL_ACCUM
, MIN_REAL_KIND
>::template Functor
,
277 void>(catKind
->second
, terminator
, result
, x
, dim
, mask
, terminator
,
280 case TypeCategory::Complex
:
281 ApplyFloatingPointKind
<PartialFloatingReductionHelper
<TypeCategory::Complex
,
282 COMPLEX_ACCUM
, MIN_REAL_KIND
>::template Functor
,
283 void>(catKind
->second
, terminator
, result
, x
, dim
, mask
, terminator
,
287 terminator
.Crash("%s: bad type code %d", intrinsic
, x
.type().raw());
291 template <typename ACCUMULATOR
> struct LocationResultHelper
{
292 template <int KIND
> struct Functor
{
293 RT_API_ATTRS
void operator()(
294 ACCUMULATOR
&accumulator
, const Descriptor
&result
) const {
295 accumulator
.GetResult(
296 result
.OffsetElement
<CppTypeFor
<TypeCategory::Integer
, KIND
>>());
301 template <typename ACCUMULATOR
> struct PartialLocationHelper
{
302 template <int KIND
> struct Functor
{
303 RT_API_ATTRS
void operator()(Descriptor
&result
, const Descriptor
&x
,
304 int dim
, const Descriptor
*mask
, Terminator
&terminator
,
305 const char *intrinsic
, ACCUMULATOR
&accumulator
) const {
306 // Element size of the destination descriptor is the size
307 // of {TypeCategory::Integer, KIND}.
308 PartialReduction
<ACCUMULATOR
, TypeCategory::Integer
, KIND
>(result
, x
,
309 Descriptor::BytesFor(TypeCategory::Integer
, KIND
), dim
, mask
,
310 terminator
, intrinsic
, accumulator
);
319 // Use at least double precision for accumulators.
320 // Don't use __float128, it doesn't work with abs() or sqrt() yet.
321 static constexpr RT_CONST_VAR_ATTRS
int Norm2LargestLDKind
{
322 #if HAS_LDBL128 || HAS_FLOAT128
333 template <TypeCategory CAT
, int KIND
, typename ACCUMULATOR
>
334 inline RT_API_ATTRS
void DoMaxMinNorm2(Descriptor
&result
, const Descriptor
&x
,
335 int dim
, const Descriptor
*mask
, const char *intrinsic
,
336 Terminator
&terminator
) {
337 using Type
= CppTypeFor
<CAT
, KIND
>;
338 ACCUMULATOR accumulator
{x
};
339 if (dim
== 0 || x
.rank() == 1) {
342 // Element size of the destination descriptor is the same
343 // as the element size of the source.
344 result
.Establish(x
.type(), x
.ElementBytes(), nullptr, 0, nullptr,
345 CFI_attribute_allocatable
);
346 if (int stat
{result
.Allocate()}) {
348 "%s: could not allocate memory for result; STAT=%d", intrinsic
, stat
);
350 DoTotalReduction
<Type
>(x
, dim
, mask
, accumulator
, intrinsic
, terminator
);
351 accumulator
.GetResult(result
.OffsetElement
<Type
>());
355 // Element size of the destination descriptor is the same
356 // as the element size of the source.
357 PartialReduction
<ACCUMULATOR
, CAT
, KIND
>(result
, x
, x
.ElementBytes(), dim
,
358 mask
, terminator
, intrinsic
, accumulator
);
362 // The data type used by Norm2Accumulator.
364 using Norm2AccumType
=
365 CppTypeFor
<TypeCategory::Real
, std::clamp(KIND
, 8, Norm2LargestLDKind
)>;
367 template <int KIND
> class Norm2Accumulator
{
369 using Type
= CppTypeFor
<TypeCategory::Real
, KIND
>;
370 using AccumType
= Norm2AccumType
<KIND
>;
371 explicit RT_API_ATTRS
Norm2Accumulator(const Descriptor
&array
)
373 RT_API_ATTRS
void Reinitialize() { max_
= sum_
= 0; }
374 template <typename A
>
375 RT_API_ATTRS
void GetResult(A
*p
, int /*zeroBasedDim*/ = -1) const {
376 // m * sqrt(1 + sum((others(:)/m)**2))
377 *p
= static_cast<Type
>(max_
* SQRTTy
<AccumType
>::compute(1 + sum_
));
379 RT_API_ATTRS
bool Accumulate(Type x
) {
380 auto absX
{ABSTy
<AccumType
>::compute(static_cast<AccumType
>(x
))};
383 } else if (absX
> max_
) {
384 auto t
{max_
/ absX
}; // < 1.0
386 sum_
*= tsq
; // scale sum to reflect change to the max
387 sum_
+= tsq
; // include a term for the previous max
389 } else { // absX <= max_
395 template <typename A
>
396 RT_API_ATTRS
bool AccumulateAt(const SubscriptValue at
[]) {
397 return Accumulate(*array_
.Element
<A
>(at
));
401 const Descriptor
&array_
;
402 AccumType max_
{0}; // value (m) with largest magnitude
403 AccumType sum_
{0}; // sum((others(:)/m)**2)
406 template <int KIND
> struct Norm2Helper
{
407 RT_API_ATTRS
void operator()(Descriptor
&result
, const Descriptor
&x
, int dim
,
408 const Descriptor
*mask
, Terminator
&terminator
) const {
409 DoMaxMinNorm2
<TypeCategory::Real
, KIND
, Norm2Accumulator
<KIND
>>(
410 result
, x
, dim
, mask
, "NORM2", terminator
);
414 } // namespace Fortran::runtime
415 #endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_