[clangd] Fix erroneous qualification of template type parameters (#116821)
[llvm-project.git] / flang / runtime / reduction-templates.h
blob2a85595bad7dbd1ec93d9acff20d785e14af21e3
1 //===-- runtime/reduction-templates.h ---------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 // Generic function templates used by various reduction transformation
10 // intrinsic functions (SUM, PRODUCT, &c.)
12 // * Partial reductions (i.e., those with DIM= arguments that are not
13 // required to be 1 by the rank of the argument) return arrays that
14 // are dynamically allocated in a caller-supplied descriptor.
15 // * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC
16 // return integer vectors of some kind, not scalars; a caller-supplied
17 // descriptor is used
18 // * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
19 // length results, dynamically allocated in a caller-supplied descriptor
21 #ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
22 #define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
24 #include "numeric-templates.h"
25 #include "terminator.h"
26 #include "tools.h"
27 #include "flang/Runtime/cpp-type.h"
28 #include "flang/Runtime/descriptor.h"
29 #include <algorithm>
31 namespace Fortran::runtime {
33 // Reductions are implemented with *accumulators*, which are instances of
34 // classes that incrementally build up the result (or an element thereof) during
35 // a traversal of the unmasked elements of an array. Each accumulator class
36 // supports a constructor (which captures a reference to the array), an
37 // AccumulateAt() member function that applies supplied subscripts to the
38 // array and does something with a scalar element, and a GetResult()
39 // member function that copies a final result into its destination.
41 // Total reduction of the array argument to a scalar (or to a vector in the
42 // cases of FINDLOC, MAXLOC, & MINLOC). These are the cases without DIM= or
43 // cases where the argument has rank 1 and DIM=, if present, must be 1.
44 template <typename TYPE, typename ACCUMULATOR>
45 inline RT_API_ATTRS void DoTotalReduction(const Descriptor &x, int dim,
46 const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
47 Terminator &terminator) {
48 if (dim < 0 || dim > 1) {
49 terminator.Crash("%s: bad DIM=%d for ARRAY argument with rank %d",
50 intrinsic, dim, x.rank());
52 SubscriptValue xAt[maxRank];
53 x.GetLowerBounds(xAt);
54 if (mask) {
55 CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
56 if (mask->rank() > 0) {
57 SubscriptValue maskAt[maxRank];
58 mask->GetLowerBounds(maskAt);
59 for (auto elements{x.Elements()}; elements--;
60 x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
61 if (IsLogicalElementTrue(*mask, maskAt)) {
62 if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
63 break;
67 return;
68 } else if (!IsLogicalScalarTrue(*mask)) {
69 // scalar MASK=.FALSE.: return identity value
70 return;
73 // No MASK=, or scalar MASK=.TRUE.
74 for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
75 if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
76 break; // cut short, result is known
81 template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
82 inline RT_API_ATTRS CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
83 const char *source, int line, int dim, const Descriptor *mask,
84 ACCUMULATOR &&accumulator, const char *intrinsic) {
85 Terminator terminator{source, line};
86 RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type());
87 using CppType = CppTypeFor<CAT, KIND>;
88 DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
89 if constexpr (std::is_void_v<CppType>) {
90 // Result is returned from accumulator, as in REDUCE() for derived type
91 #ifdef _MSC_VER // work around MSVC spurious error
92 accumulator.GetResult();
93 #else
94 accumulator.template GetResult<CppType>();
95 #endif
96 } else {
97 CppType result;
98 #ifdef _MSC_VER // work around MSVC spurious error
99 accumulator.GetResult(&result);
100 #else
101 accumulator.template GetResult<CppType>(&result);
102 #endif
103 return result;
107 // For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
108 // of the array is [2,3,5], the shape of the result is [2,5] and
109 // result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
110 // lower bounds other than one. This utility subroutine creates an
111 // array of subscripts [j,_,k] for result subscripts [j,k] so that the
112 // elements of array(j,:,k) can be reduced.
113 inline RT_API_ATTRS void GetExpandedSubscripts(SubscriptValue at[],
114 const Descriptor &descriptor, int zeroBasedDim,
115 const SubscriptValue from[]) {
116 descriptor.GetLowerBounds(at);
117 int rank{descriptor.rank()};
118 int j{0};
119 for (; j < zeroBasedDim; ++j) {
120 at[j] += from[j] - 1 /*lower bound*/;
122 for (++j; j < rank; ++j) {
123 at[j] += from[j - 1] - 1;
127 template <typename TYPE, typename ACCUMULATOR>
128 inline RT_API_ATTRS void ReduceDimToScalar(const Descriptor &x,
129 int zeroBasedDim, SubscriptValue subscripts[], TYPE *result,
130 ACCUMULATOR &accumulator) {
131 SubscriptValue xAt[maxRank];
132 GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
133 const auto &dim{x.GetDimension(zeroBasedDim)};
134 SubscriptValue at{dim.LowerBound()};
135 for (auto n{dim.Extent()}; n-- > 0; ++at) {
136 xAt[zeroBasedDim] = at;
137 if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
138 break;
141 #ifdef _MSC_VER // work around MSVC spurious error
142 accumulator.GetResult(result, zeroBasedDim);
143 #else
144 accumulator.template GetResult<TYPE>(result, zeroBasedDim);
145 #endif
148 template <typename TYPE, typename ACCUMULATOR>
149 inline RT_API_ATTRS void ReduceDimMaskToScalar(const Descriptor &x,
150 int zeroBasedDim, SubscriptValue subscripts[], const Descriptor &mask,
151 TYPE *result, ACCUMULATOR &accumulator) {
152 SubscriptValue xAt[maxRank], maskAt[maxRank];
153 GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
154 GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
155 const auto &xDim{x.GetDimension(zeroBasedDim)};
156 SubscriptValue xPos{xDim.LowerBound()};
157 const auto &maskDim{mask.GetDimension(zeroBasedDim)};
158 SubscriptValue maskPos{maskDim.LowerBound()};
159 for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0;
160 ++xPos, ++maskPos) {
161 maskAt[zeroBasedDim] = maskPos;
162 if (IsLogicalElementTrue(mask, maskAt)) {
163 xAt[zeroBasedDim] = xPos;
164 if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
165 break;
169 #ifdef _MSC_VER // work around MSVC spurious error
170 accumulator.GetResult(result, zeroBasedDim);
171 #else
172 accumulator.template GetResult<TYPE>(result, zeroBasedDim);
173 #endif
176 // Partial reductions with DIM=
178 template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
179 inline RT_API_ATTRS void PartialReduction(Descriptor &result,
180 const Descriptor &x, std::size_t resultElementSize, int dim,
181 const Descriptor *mask, Terminator &terminator, const char *intrinsic,
182 ACCUMULATOR &accumulator) {
183 CreatePartialReductionResult(result, x, resultElementSize, dim, terminator,
184 intrinsic, TypeCode{CAT, KIND});
185 SubscriptValue at[maxRank];
186 result.GetLowerBounds(at);
187 INTERNAL_CHECK(result.rank() == 0 || at[0] == 1);
188 using CppType = CppTypeFor<CAT, KIND>;
189 if (mask) {
190 CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
191 if (mask->rank() > 0) {
192 for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
193 accumulator.Reinitialize();
194 ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
195 x, dim - 1, at, *mask, result.Element<CppType>(at), accumulator);
197 return;
198 } else if (!IsLogicalScalarTrue(*mask)) {
199 // scalar MASK=.FALSE.
200 accumulator.Reinitialize();
201 for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
202 accumulator.GetResult(result.Element<CppType>(at));
204 return;
207 // No MASK= or scalar MASK=.TRUE.
208 for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
209 accumulator.Reinitialize();
210 ReduceDimToScalar<CppType, ACCUMULATOR>(
211 x, dim - 1, at, result.Element<CppType>(at), accumulator);
215 template <template <typename> class ACCUM>
216 struct PartialIntegerReductionHelper {
217 template <int KIND> struct Functor {
218 static constexpr int Intermediate{
219 std::max(KIND, 4)}; // use at least "int" for intermediate results
220 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
221 int dim, const Descriptor *mask, Terminator &terminator,
222 const char *intrinsic) const {
223 using Accumulator =
224 ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
225 Accumulator accumulator{x};
226 // Element size of the destination descriptor is the same
227 // as the element size of the source.
228 PartialReduction<Accumulator, TypeCategory::Integer, KIND>(result, x,
229 x.ElementBytes(), dim, mask, terminator, intrinsic, accumulator);
234 template <template <typename> class INTEGER_ACCUM>
235 inline RT_API_ATTRS void PartialIntegerReduction(Descriptor &result,
236 const Descriptor &x, int dim, int kind, const Descriptor *mask,
237 const char *intrinsic, Terminator &terminator) {
238 ApplyIntegerKind<
239 PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>(
240 kind, terminator, result, x, dim, mask, terminator, intrinsic);
243 template <TypeCategory CAT, template <typename> class ACCUM, int MIN_KIND>
244 struct PartialFloatingReductionHelper {
245 template <int KIND> struct Functor {
246 static constexpr int Intermediate{std::max(KIND, MIN_KIND)};
247 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
248 int dim, const Descriptor *mask, Terminator &terminator,
249 const char *intrinsic) const {
250 using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
251 Accumulator accumulator{x};
252 // Element size of the destination descriptor is the same
253 // as the element size of the source.
254 PartialReduction<Accumulator, CAT, KIND>(result, x, x.ElementBytes(), dim,
255 mask, terminator, intrinsic, accumulator);
260 template <template <typename> class INTEGER_ACCUM,
261 template <typename> class REAL_ACCUM,
262 template <typename> class COMPLEX_ACCUM, int MIN_REAL_KIND>
263 inline RT_API_ATTRS void TypedPartialNumericReduction(Descriptor &result,
264 const Descriptor &x, int dim, const char *source, int line,
265 const Descriptor *mask, const char *intrinsic) {
266 Terminator terminator{source, line};
267 auto catKind{x.type().GetCategoryAndKind()};
268 RUNTIME_CHECK(terminator, catKind.has_value());
269 switch (catKind->first) {
270 case TypeCategory::Integer:
271 PartialIntegerReduction<INTEGER_ACCUM>(
272 result, x, dim, catKind->second, mask, intrinsic, terminator);
273 break;
274 case TypeCategory::Real:
275 ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real,
276 REAL_ACCUM, MIN_REAL_KIND>::template Functor,
277 void>(catKind->second, terminator, result, x, dim, mask, terminator,
278 intrinsic);
279 break;
280 case TypeCategory::Complex:
281 ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex,
282 COMPLEX_ACCUM, MIN_REAL_KIND>::template Functor,
283 void>(catKind->second, terminator, result, x, dim, mask, terminator,
284 intrinsic);
285 break;
286 default:
287 terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
291 template <typename ACCUMULATOR> struct LocationResultHelper {
292 template <int KIND> struct Functor {
293 RT_API_ATTRS void operator()(
294 ACCUMULATOR &accumulator, const Descriptor &result) const {
295 accumulator.GetResult(
296 result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>());
301 template <typename ACCUMULATOR> struct PartialLocationHelper {
302 template <int KIND> struct Functor {
303 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
304 int dim, const Descriptor *mask, Terminator &terminator,
305 const char *intrinsic, ACCUMULATOR &accumulator) const {
306 // Element size of the destination descriptor is the size
307 // of {TypeCategory::Integer, KIND}.
308 PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(result, x,
309 Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, mask,
310 terminator, intrinsic, accumulator);
315 // NORM2 templates
317 RT_VAR_GROUP_BEGIN
319 // Use at least double precision for accumulators.
320 // Don't use __float128, it doesn't work with abs() or sqrt() yet.
321 static constexpr RT_CONST_VAR_ATTRS int Norm2LargestLDKind{
322 #if HAS_LDBL128 || HAS_FLOAT128
324 #elif HAS_FLOAT80
326 #else
328 #endif
331 RT_VAR_GROUP_END
333 template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
334 inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
335 int dim, const Descriptor *mask, const char *intrinsic,
336 Terminator &terminator) {
337 using Type = CppTypeFor<CAT, KIND>;
338 ACCUMULATOR accumulator{x};
339 if (dim == 0 || x.rank() == 1) {
340 // Total reduction
342 // Element size of the destination descriptor is the same
343 // as the element size of the source.
344 result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
345 CFI_attribute_allocatable);
346 if (int stat{result.Allocate()}) {
347 terminator.Crash(
348 "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
350 DoTotalReduction<Type>(x, dim, mask, accumulator, intrinsic, terminator);
351 accumulator.GetResult(result.OffsetElement<Type>());
352 } else {
353 // Partial reduction
355 // Element size of the destination descriptor is the same
356 // as the element size of the source.
357 PartialReduction<ACCUMULATOR, CAT, KIND>(result, x, x.ElementBytes(), dim,
358 mask, terminator, intrinsic, accumulator);
362 // The data type used by Norm2Accumulator.
363 template <int KIND>
364 using Norm2AccumType =
365 CppTypeFor<TypeCategory::Real, std::clamp(KIND, 8, Norm2LargestLDKind)>;
367 template <int KIND> class Norm2Accumulator {
368 public:
369 using Type = CppTypeFor<TypeCategory::Real, KIND>;
370 using AccumType = Norm2AccumType<KIND>;
371 explicit RT_API_ATTRS Norm2Accumulator(const Descriptor &array)
372 : array_{array} {}
373 RT_API_ATTRS void Reinitialize() { max_ = sum_ = 0; }
374 template <typename A>
375 RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
376 // m * sqrt(1 + sum((others(:)/m)**2))
377 *p = static_cast<Type>(max_ * SQRTTy<AccumType>::compute(1 + sum_));
379 RT_API_ATTRS bool Accumulate(Type x) {
380 auto absX{ABSTy<AccumType>::compute(static_cast<AccumType>(x))};
381 if (!max_) {
382 max_ = absX;
383 } else if (absX > max_) {
384 auto t{max_ / absX}; // < 1.0
385 auto tsq{t * t};
386 sum_ *= tsq; // scale sum to reflect change to the max
387 sum_ += tsq; // include a term for the previous max
388 max_ = absX;
389 } else { // absX <= max_
390 auto t{absX / max_};
391 sum_ += t * t;
393 return true;
395 template <typename A>
396 RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
397 return Accumulate(*array_.Element<A>(at));
400 private:
401 const Descriptor &array_;
402 AccumType max_{0}; // value (m) with largest magnitude
403 AccumType sum_{0}; // sum((others(:)/m)**2)
406 template <int KIND> struct Norm2Helper {
407 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, int dim,
408 const Descriptor *mask, Terminator &terminator) const {
409 DoMaxMinNorm2<TypeCategory::Real, KIND, Norm2Accumulator<KIND>>(
410 result, x, dim, mask, "NORM2", terminator);
414 } // namespace Fortran::runtime
415 #endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_