1 //===-- runtime/reduction-templates.h -------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Generic function templates used by various reduction transformation
10 // intrinsic functions (SUM, PRODUCT, &c.)
12 // * Partial reductions (i.e., those with DIM= arguments that are not
13 // required to be 1 by the rank of the argument) return arrays that
14 // are dynamically allocated in a caller-supplied descriptor.
15 // * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC
16 // return integer vectors of some kind, not scalars; a caller-supplied
18 // * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
19 // length results, dynamically allocated in a caller-supplied descriptor
21 #ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
22 #define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
24 #include "terminator.h"
26 #include "flang/Runtime/cpp-type.h"
27 #include "flang/Runtime/descriptor.h"
29 namespace Fortran::runtime
{
31 // Reductions are implemented with *accumulators*, which are instances of
32 // classes that incrementally build up the result (or an element thereof) during
33 // a traversal of the unmasked elements of an array. Each accumulator class
34 // supports a constructor (which captures a reference to the array), an
35 // AccumulateAt() member function that applies supplied subscripts to the
36 // array and does something with a scalar element, and a GetResult()
37 // member function that copies a final result into its destination.
39 // Total reduction of the array argument to a scalar (or to a vector in the
40 // cases of FINDLOC, MAXLOC, & MINLOC). These are the cases without DIM= or
41 // cases where the argument has rank 1 and DIM=, if present, must be 1.
42 template <typename TYPE
, typename ACCUMULATOR
>
43 inline void DoTotalReduction(const Descriptor
&x
, int dim
,
44 const Descriptor
*mask
, ACCUMULATOR
&accumulator
, const char *intrinsic
,
45 Terminator
&terminator
) {
46 if (dim
< 0 || dim
> 1) {
47 terminator
.Crash("%s: bad DIM=%d for ARRAY argument with rank %d",
48 intrinsic
, dim
, x
.rank());
50 SubscriptValue xAt
[maxRank
];
51 x
.GetLowerBounds(xAt
);
53 CheckConformability(x
, *mask
, terminator
, intrinsic
, "ARRAY", "MASK");
54 SubscriptValue maskAt
[maxRank
];
55 mask
->GetLowerBounds(maskAt
);
56 if (mask
->rank() > 0) {
57 for (auto elements
{x
.Elements()}; elements
--;
58 x
.IncrementSubscripts(xAt
), mask
->IncrementSubscripts(maskAt
)) {
59 if (IsLogicalElementTrue(*mask
, maskAt
)) {
60 if (!accumulator
.template AccumulateAt
<TYPE
>(xAt
))
65 } else if (!IsLogicalElementTrue(*mask
, maskAt
)) {
66 // scalar MASK=.FALSE.: return identity value
70 // No MASK=, or scalar MASK=.TRUE.
71 for (auto elements
{x
.Elements()}; elements
--; x
.IncrementSubscripts(xAt
)) {
72 if (!accumulator
.template AccumulateAt
<TYPE
>(xAt
)) {
73 break; // cut short, result is known
78 template <TypeCategory CAT
, int KIND
, typename ACCUMULATOR
>
79 inline CppTypeFor
<CAT
, KIND
> GetTotalReduction(const Descriptor
&x
,
80 const char *source
, int line
, int dim
, const Descriptor
*mask
,
81 ACCUMULATOR
&&accumulator
, const char *intrinsic
) {
82 Terminator terminator
{source
, line
};
83 RUNTIME_CHECK(terminator
, TypeCode(CAT
, KIND
) == x
.type());
84 using CppType
= CppTypeFor
<CAT
, KIND
>;
85 DoTotalReduction
<CppType
>(x
, dim
, mask
, accumulator
, intrinsic
, terminator
);
87 #ifdef _MSC_VER // work around MSVC spurious error
88 accumulator
.GetResult(&result
);
90 accumulator
.template GetResult(&result
);
95 // For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
96 // of the array is [2,3,5], the shape of the result is [2,5] and
97 // result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
98 // lower bounds other than one. This utility subroutine creates an
99 // array of subscripts [j,_,k] for result subscripts [j,k] so that the
100 // elements of array(j,:,k) can be reduced.
101 inline void GetExpandedSubscripts(SubscriptValue at
[],
102 const Descriptor
&descriptor
, int zeroBasedDim
,
103 const SubscriptValue from
[]) {
104 descriptor
.GetLowerBounds(at
);
105 int rank
{descriptor
.rank()};
107 for (; j
< zeroBasedDim
; ++j
) {
108 at
[j
] += from
[j
] - 1 /*lower bound*/;
110 for (++j
; j
< rank
; ++j
) {
111 at
[j
] += from
[j
- 1] - 1;
115 template <typename TYPE
, typename ACCUMULATOR
>
116 inline void ReduceDimToScalar(const Descriptor
&x
, int zeroBasedDim
,
117 SubscriptValue subscripts
[], TYPE
*result
, ACCUMULATOR
&accumulator
) {
118 SubscriptValue xAt
[maxRank
];
119 GetExpandedSubscripts(xAt
, x
, zeroBasedDim
, subscripts
);
120 const auto &dim
{x
.GetDimension(zeroBasedDim
)};
121 SubscriptValue at
{dim
.LowerBound()};
122 for (auto n
{dim
.Extent()}; n
-- > 0; ++at
) {
123 xAt
[zeroBasedDim
] = at
;
124 if (!accumulator
.template AccumulateAt
<TYPE
>(xAt
)) {
128 #ifdef _MSC_VER // work around MSVC spurious error
129 accumulator
.GetResult(result
, zeroBasedDim
);
131 accumulator
.template GetResult(result
, zeroBasedDim
);
135 template <typename TYPE
, typename ACCUMULATOR
>
136 inline void ReduceDimMaskToScalar(const Descriptor
&x
, int zeroBasedDim
,
137 SubscriptValue subscripts
[], const Descriptor
&mask
, TYPE
*result
,
138 ACCUMULATOR
&accumulator
) {
139 SubscriptValue xAt
[maxRank
], maskAt
[maxRank
];
140 GetExpandedSubscripts(xAt
, x
, zeroBasedDim
, subscripts
);
141 GetExpandedSubscripts(maskAt
, mask
, zeroBasedDim
, subscripts
);
142 const auto &xDim
{x
.GetDimension(zeroBasedDim
)};
143 SubscriptValue xPos
{xDim
.LowerBound()};
144 const auto &maskDim
{mask
.GetDimension(zeroBasedDim
)};
145 SubscriptValue maskPos
{maskDim
.LowerBound()};
146 for (auto n
{x
.GetDimension(zeroBasedDim
).Extent()}; n
-- > 0;
148 maskAt
[zeroBasedDim
] = maskPos
;
149 if (IsLogicalElementTrue(mask
, maskAt
)) {
150 xAt
[zeroBasedDim
] = xPos
;
151 if (!accumulator
.template AccumulateAt
<TYPE
>(xAt
)) {
156 #ifdef _MSC_VER // work around MSVC spurious error
157 accumulator
.GetResult(result
, zeroBasedDim
);
159 accumulator
.template GetResult(result
, zeroBasedDim
);
163 // Utility: establishes & allocates the result array for a partial
164 // reduction (i.e., one with DIM=).
165 static void CreatePartialReductionResult(Descriptor
&result
,
166 const Descriptor
&x
, std::size_t resultElementSize
, int dim
,
167 Terminator
&terminator
, const char *intrinsic
, TypeCode typeCode
) {
169 if (dim
< 1 || dim
> xRank
) {
171 "%s: bad DIM=%d for ARRAY with rank %d", intrinsic
, dim
, xRank
);
173 int zeroBasedDim
{dim
- 1};
174 SubscriptValue resultExtent
[maxRank
];
175 for (int j
{0}; j
< zeroBasedDim
; ++j
) {
176 resultExtent
[j
] = x
.GetDimension(j
).Extent();
178 for (int j
{zeroBasedDim
+ 1}; j
< xRank
; ++j
) {
179 resultExtent
[j
- 1] = x
.GetDimension(j
).Extent();
181 result
.Establish(typeCode
, resultElementSize
, nullptr, xRank
- 1,
182 resultExtent
, CFI_attribute_allocatable
);
183 for (int j
{0}; j
+ 1 < xRank
; ++j
) {
184 result
.GetDimension(j
).SetBounds(1, resultExtent
[j
]);
186 if (int stat
{result
.Allocate()}) {
188 "%s: could not allocate memory for result; STAT=%d", intrinsic
, stat
);
192 // Partial reductions with DIM=
194 template <typename ACCUMULATOR
, TypeCategory CAT
, int KIND
>
195 inline void PartialReduction(Descriptor
&result
, const Descriptor
&x
,
196 std::size_t resultElementSize
, int dim
, const Descriptor
*mask
,
197 Terminator
&terminator
, const char *intrinsic
, ACCUMULATOR
&accumulator
) {
198 CreatePartialReductionResult(result
, x
, resultElementSize
, dim
, terminator
,
199 intrinsic
, TypeCode
{CAT
, KIND
});
200 SubscriptValue at
[maxRank
];
201 result
.GetLowerBounds(at
);
202 INTERNAL_CHECK(result
.rank() == 0 || at
[0] == 1);
203 using CppType
= CppTypeFor
<CAT
, KIND
>;
205 CheckConformability(x
, *mask
, terminator
, intrinsic
, "ARRAY", "MASK");
206 SubscriptValue maskAt
[maxRank
]; // contents unused
207 if (mask
->rank() > 0) {
208 for (auto n
{result
.Elements()}; n
-- > 0; result
.IncrementSubscripts(at
)) {
209 accumulator
.Reinitialize();
210 ReduceDimMaskToScalar
<CppType
, ACCUMULATOR
>(
211 x
, dim
- 1, at
, *mask
, result
.Element
<CppType
>(at
), accumulator
);
214 } else if (!IsLogicalElementTrue(*mask
, maskAt
)) {
215 // scalar MASK=.FALSE.
216 accumulator
.Reinitialize();
217 for (auto n
{result
.Elements()}; n
-- > 0; result
.IncrementSubscripts(at
)) {
218 accumulator
.GetResult(result
.Element
<CppType
>(at
));
223 // No MASK= or scalar MASK=.TRUE.
224 for (auto n
{result
.Elements()}; n
-- > 0; result
.IncrementSubscripts(at
)) {
225 accumulator
.Reinitialize();
226 ReduceDimToScalar
<CppType
, ACCUMULATOR
>(
227 x
, dim
- 1, at
, result
.Element
<CppType
>(at
), accumulator
);
231 template <template <typename
> class ACCUM
>
232 struct PartialIntegerReductionHelper
{
233 template <int KIND
> struct Functor
{
234 static constexpr int Intermediate
{
235 std::max(KIND
, 4)}; // use at least "int" for intermediate results
236 void operator()(Descriptor
&result
, const Descriptor
&x
, int dim
,
237 const Descriptor
*mask
, Terminator
&terminator
,
238 const char *intrinsic
) const {
240 ACCUM
<CppTypeFor
<TypeCategory::Integer
, Intermediate
>>;
241 Accumulator accumulator
{x
};
242 // Element size of the destination descriptor is the same
243 // as the element size of the source.
244 PartialReduction
<Accumulator
, TypeCategory::Integer
, KIND
>(result
, x
,
245 x
.ElementBytes(), dim
, mask
, terminator
, intrinsic
, accumulator
);
250 template <template <typename
> class INTEGER_ACCUM
>
251 inline void PartialIntegerReduction(Descriptor
&result
, const Descriptor
&x
,
252 int dim
, int kind
, const Descriptor
*mask
, const char *intrinsic
,
253 Terminator
&terminator
) {
255 PartialIntegerReductionHelper
<INTEGER_ACCUM
>::template Functor
, void>(
256 kind
, terminator
, result
, x
, dim
, mask
, terminator
, intrinsic
);
259 template <TypeCategory CAT
, template <typename
> class ACCUM
>
260 struct PartialFloatingReductionHelper
{
261 template <int KIND
> struct Functor
{
262 static constexpr int Intermediate
{
263 std::max(KIND
, 8)}; // use at least "double" for intermediate results
264 void operator()(Descriptor
&result
, const Descriptor
&x
, int dim
,
265 const Descriptor
*mask
, Terminator
&terminator
,
266 const char *intrinsic
) const {
267 using Accumulator
= ACCUM
<CppTypeFor
<TypeCategory::Real
, Intermediate
>>;
268 Accumulator accumulator
{x
};
269 // Element size of the destination descriptor is the same
270 // as the element size of the source.
271 PartialReduction
<Accumulator
, CAT
, KIND
>(result
, x
, x
.ElementBytes(), dim
,
272 mask
, terminator
, intrinsic
, accumulator
);
277 template <template <typename
> class INTEGER_ACCUM
,
278 template <typename
> class REAL_ACCUM
,
279 template <typename
> class COMPLEX_ACCUM
>
280 inline void TypedPartialNumericReduction(Descriptor
&result
,
281 const Descriptor
&x
, int dim
, const char *source
, int line
,
282 const Descriptor
*mask
, const char *intrinsic
) {
283 Terminator terminator
{source
, line
};
284 auto catKind
{x
.type().GetCategoryAndKind()};
285 RUNTIME_CHECK(terminator
, catKind
.has_value());
286 switch (catKind
->first
) {
287 case TypeCategory::Integer
:
288 PartialIntegerReduction
<INTEGER_ACCUM
>(
289 result
, x
, dim
, catKind
->second
, mask
, intrinsic
, terminator
);
291 case TypeCategory::Real
:
292 ApplyFloatingPointKind
<PartialFloatingReductionHelper
<TypeCategory::Real
,
293 REAL_ACCUM
>::template Functor
,
294 void>(catKind
->second
, terminator
, result
, x
, dim
, mask
, terminator
,
297 case TypeCategory::Complex
:
298 ApplyFloatingPointKind
<PartialFloatingReductionHelper
<TypeCategory::Complex
,
299 COMPLEX_ACCUM
>::template Functor
,
300 void>(catKind
->second
, terminator
, result
, x
, dim
, mask
, terminator
,
304 terminator
.Crash("%s: bad type code %d", intrinsic
, x
.type().raw());
308 template <typename ACCUMULATOR
> struct LocationResultHelper
{
309 template <int KIND
> struct Functor
{
310 void operator()(ACCUMULATOR
&accumulator
, const Descriptor
&result
) const {
311 accumulator
.GetResult(
312 result
.OffsetElement
<CppTypeFor
<TypeCategory::Integer
, KIND
>>());
317 template <typename ACCUMULATOR
> struct PartialLocationHelper
{
318 template <int KIND
> struct Functor
{
319 void operator()(Descriptor
&result
, const Descriptor
&x
, int dim
,
320 const Descriptor
*mask
, Terminator
&terminator
, const char *intrinsic
,
321 ACCUMULATOR
&accumulator
) const {
322 // Element size of the destination descriptor is the size
323 // of {TypeCategory::Integer, KIND}.
324 PartialReduction
<ACCUMULATOR
, TypeCategory::Integer
, KIND
>(result
, x
,
325 Descriptor::BytesFor(TypeCategory::Integer
, KIND
), dim
, mask
,
326 terminator
, intrinsic
, accumulator
);
331 } // namespace Fortran::runtime
332 #endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_