flang/runtime/reduction-templates.h

   1 //===-- runtime/reduction-templates.h ---------------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 // Generic function templates used by various reduction transformation
  10 // intrinsic functions (SUM, PRODUCT, &c.)
  11 //
  12 // * Partial reductions (i.e., those with DIM= arguments that are not
  13 //   required to be 1 by the rank of the argument) return arrays that
  14 //   are dynamically allocated in a caller-supplied descriptor.
  15 // * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC
  16 //   return integer vectors of some kind, not scalars; a caller-supplied
  17 //   descriptor is used
  18 // * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
  19 //   length results, dynamically allocated in a caller-supplied descriptor
  20
  21 #ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
  22 #define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
  23
  24 #include "numeric-templates.h"
  25 #include "terminator.h"
  26 #include "tools.h"
  27 #include "flang/Runtime/cpp-type.h"
  28 #include "flang/Runtime/descriptor.h"
  29 #include <algorithm>
  30
  31 namespace Fortran::runtime {
  32
  33 // Reductions are implemented with *accumulators*, which are instances of
  34 // classes that incrementally build up the result (or an element thereof) during
  35 // a traversal of the unmasked elements of an array.  Each accumulator class
  36 // supports a constructor (which captures a reference to the array), an
  37 // AccumulateAt() member function that applies supplied subscripts to the
  38 // array and does something with a scalar element, and a GetResult()
  39 // member function that copies a final result into its destination.
  40
  41 // Total reduction of the array argument to a scalar (or to a vector in the
  42 // cases of FINDLOC, MAXLOC, & MINLOC).  These are the cases without DIM= or
  43 // cases where the argument has rank 1 and DIM=, if present, must be 1.
  44 template <typename TYPE, typename ACCUMULATOR>
  45 inline RT_API_ATTRS void DoTotalReduction(const Descriptor &x, int dim,
  46     const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
  47     Terminator &terminator) {
  48   if (dim < 0 || dim > 1) {
  49     terminator.Crash("%s: bad DIM=%d for ARRAY argument with rank %d",
  50         intrinsic, dim, x.rank());
  51   }
  52   SubscriptValue xAt[maxRank];
  53   x.GetLowerBounds(xAt);
  54   if (mask) {
  55     CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
  56     if (mask->rank() > 0) {
  57       SubscriptValue maskAt[maxRank];
  58       mask->GetLowerBounds(maskAt);
  59       for (auto elements{x.Elements()}; elements--;
  60            x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
  61         if (IsLogicalElementTrue(*mask, maskAt)) {
  62           if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
  63             break;
  64           }
  65         }
  66       }
  67       return;
  68     } else if (!IsLogicalScalarTrue(*mask)) {
  69       // scalar MASK=.FALSE.: return identity value
  70       return;
  71     }
  72   }
  73   // No MASK=, or scalar MASK=.TRUE.
  74   for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
  75     if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
  76       break; // cut short, result is known
  77     }
  78   }
  79 }
  80
  81 template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
  82 inline RT_API_ATTRS CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
  83     const char *source, int line, int dim, const Descriptor *mask,
  84     ACCUMULATOR &&accumulator, const char *intrinsic) {
  85   Terminator terminator{source, line};
  86   RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type());
  87   using CppType = CppTypeFor<CAT, KIND>;
  88   DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
  89   if constexpr (std::is_void_v<CppType>) {
  90     // Result is returned from accumulator, as in REDUCE() for derived type
  91 #ifdef _MSC_VER // work around MSVC spurious error
  92     accumulator.GetResult();
  93 #else
  94     accumulator.template GetResult<CppType>();
  95 #endif
  96   } else {
  97     CppType result;
  98 #ifdef _MSC_VER // work around MSVC spurious error
  99     accumulator.GetResult(&result);
 100 #else
 101     accumulator.template GetResult<CppType>(&result);
 102 #endif
 103     return result;
 104   }
 105 }
 106
 107 // For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
 108 // of the array is [2,3,5], the shape of the result is [2,5] and
 109 // result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
 110 // lower bounds other than one.  This utility subroutine creates an
 111 // array of subscripts [j,_,k] for result subscripts [j,k] so that the
 112 // elements of array(j,:,k) can be reduced.
 113 inline RT_API_ATTRS void GetExpandedSubscripts(SubscriptValue at[],
 114     const Descriptor &descriptor, int zeroBasedDim,
 115     const SubscriptValue from[]) {
 116   descriptor.GetLowerBounds(at);
 117   int rank{descriptor.rank()};
 118   int j{0};
 119   for (; j < zeroBasedDim; ++j) {
 120     at[j] += from[j] - 1 /*lower bound*/;
 121   }
 122   for (++j; j < rank; ++j) {
 123     at[j] += from[j - 1] - 1;
 124   }
 125 }
 126
 127 template <typename TYPE, typename ACCUMULATOR>
 128 inline RT_API_ATTRS void ReduceDimToScalar(const Descriptor &x,
 129     int zeroBasedDim, SubscriptValue subscripts[], TYPE *result,
 130     ACCUMULATOR &accumulator) {
 131   SubscriptValue xAt[maxRank];
 132   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
 133   const auto &dim{x.GetDimension(zeroBasedDim)};
 134   SubscriptValue at{dim.LowerBound()};
 135   for (auto n{dim.Extent()}; n-- > 0; ++at) {
 136     xAt[zeroBasedDim] = at;
 137     if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
 138       break;
 139     }
 140   }
 141 #ifdef _MSC_VER // work around MSVC spurious error
 142   accumulator.GetResult(result, zeroBasedDim);
 143 #else
 144   accumulator.template GetResult<TYPE>(result, zeroBasedDim);
 145 #endif
 146 }
 147
 148 template <typename TYPE, typename ACCUMULATOR>
 149 inline RT_API_ATTRS void ReduceDimMaskToScalar(const Descriptor &x,
 150     int zeroBasedDim, SubscriptValue subscripts[], const Descriptor &mask,
 151     TYPE *result, ACCUMULATOR &accumulator) {
 152   SubscriptValue xAt[maxRank], maskAt[maxRank];
 153   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
 154   GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
 155   const auto &xDim{x.GetDimension(zeroBasedDim)};
 156   SubscriptValue xPos{xDim.LowerBound()};
 157   const auto &maskDim{mask.GetDimension(zeroBasedDim)};
 158   SubscriptValue maskPos{maskDim.LowerBound()};
 159   for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0;
 160        ++xPos, ++maskPos) {
 161     maskAt[zeroBasedDim] = maskPos;
 162     if (IsLogicalElementTrue(mask, maskAt)) {
 163       xAt[zeroBasedDim] = xPos;
 164       if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
 165         break;
 166       }
 167     }
 168   }
 169 #ifdef _MSC_VER // work around MSVC spurious error
 170   accumulator.GetResult(result, zeroBasedDim);
 171 #else
 172   accumulator.template GetResult<TYPE>(result, zeroBasedDim);
 173 #endif
 174 }
 175
 176 // Partial reductions with DIM=
 177
 178 template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
 179 inline RT_API_ATTRS void PartialReduction(Descriptor &result,
 180     const Descriptor &x, std::size_t resultElementSize, int dim,
 181     const Descriptor *mask, Terminator &terminator, const char *intrinsic,
 182     ACCUMULATOR &accumulator) {
 183   CreatePartialReductionResult(result, x, resultElementSize, dim, terminator,
 184       intrinsic, TypeCode{CAT, KIND});
 185   SubscriptValue at[maxRank];
 186   result.GetLowerBounds(at);
 187   INTERNAL_CHECK(result.rank() == 0 || at[0] == 1);
 188   using CppType = CppTypeFor<CAT, KIND>;
 189   if (mask) {
 190     CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
 191     if (mask->rank() > 0) {
 192       for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
 193         accumulator.Reinitialize();
 194         ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
 195             x, dim - 1, at, *mask, result.Element<CppType>(at), accumulator);
 196       }
 197       return;
 198     } else if (!IsLogicalScalarTrue(*mask)) {
 199       // scalar MASK=.FALSE.
 200       accumulator.Reinitialize();
 201       for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
 202         accumulator.GetResult(result.Element<CppType>(at));
 203       }
 204       return;
 205     }
 206   }
 207   // No MASK= or scalar MASK=.TRUE.
 208   for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
 209     accumulator.Reinitialize();
 210     ReduceDimToScalar<CppType, ACCUMULATOR>(
 211         x, dim - 1, at, result.Element<CppType>(at), accumulator);
 212   }
 213 }
 214
 215 template <template <typename> class ACCUM>
 216 struct PartialIntegerReductionHelper {
 217   template <int KIND> struct Functor {
 218     static constexpr int Intermediate{
 219         std::max(KIND, 4)}; // use at least "int" for intermediate results
 220     RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
 221         int dim, const Descriptor *mask, Terminator &terminator,
 222         const char *intrinsic) const {
 223       using Accumulator =
 224           ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
 225       Accumulator accumulator{x};
 226       // Element size of the destination descriptor is the same
 227       // as the element size of the source.
 228       PartialReduction<Accumulator, TypeCategory::Integer, KIND>(result, x,
 229           x.ElementBytes(), dim, mask, terminator, intrinsic, accumulator);
 230     }
 231   };
 232 };
 233
 234 template <template <typename> class INTEGER_ACCUM>
 235 inline RT_API_ATTRS void PartialIntegerReduction(Descriptor &result,
 236     const Descriptor &x, int dim, int kind, const Descriptor *mask,
 237     const char *intrinsic, Terminator &terminator) {
 238   ApplyIntegerKind<
 239       PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>(
 240       kind, terminator, result, x, dim, mask, terminator, intrinsic);
 241 }
 242
 243 template <TypeCategory CAT, template <typename> class ACCUM, int MIN_KIND>
 244 struct PartialFloatingReductionHelper {
 245   template <int KIND> struct Functor {
 246     static constexpr int Intermediate{std::max(KIND, MIN_KIND)};
 247     RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
 248         int dim, const Descriptor *mask, Terminator &terminator,
 249         const char *intrinsic) const {
 250       using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
 251       Accumulator accumulator{x};
 252       // Element size of the destination descriptor is the same
 253       // as the element size of the source.
 254       PartialReduction<Accumulator, CAT, KIND>(result, x, x.ElementBytes(), dim,
 255           mask, terminator, intrinsic, accumulator);
 256     }
 257   };
 258 };
 259
 260 template <template <typename> class INTEGER_ACCUM,
 261     template <typename> class REAL_ACCUM,
 262     template <typename> class COMPLEX_ACCUM, int MIN_REAL_KIND>
 263 inline RT_API_ATTRS void TypedPartialNumericReduction(Descriptor &result,
 264     const Descriptor &x, int dim, const char *source, int line,
 265     const Descriptor *mask, const char *intrinsic) {
 266   Terminator terminator{source, line};
 267   auto catKind{x.type().GetCategoryAndKind()};
 268   RUNTIME_CHECK(terminator, catKind.has_value());
 269   switch (catKind->first) {
 270   case TypeCategory::Integer:
 271     PartialIntegerReduction<INTEGER_ACCUM>(
 272         result, x, dim, catKind->second, mask, intrinsic, terminator);
 273     break;
 274   case TypeCategory::Real:
 275     ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real,
 276                                REAL_ACCUM, MIN_REAL_KIND>::template Functor,
 277         void>(catKind->second, terminator, result, x, dim, mask, terminator,
 278         intrinsic);
 279     break;
 280   case TypeCategory::Complex:
 281     ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex,
 282                                COMPLEX_ACCUM, MIN_REAL_KIND>::template Functor,
 283         void>(catKind->second, terminator, result, x, dim, mask, terminator,
 284         intrinsic);
 285     break;
 286   default:
 287     terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
 288   }
 289 }
 290
 291 template <typename ACCUMULATOR> struct LocationResultHelper {
 292   template <int KIND> struct Functor {
 293     RT_API_ATTRS void operator()(
 294         ACCUMULATOR &accumulator, const Descriptor &result) const {
 295       accumulator.GetResult(
 296           result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>());
 297     }
 298   };
 299 };
 300
 301 template <typename ACCUMULATOR> struct PartialLocationHelper {
 302   template <int KIND> struct Functor {
 303     RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
 304         int dim, const Descriptor *mask, Terminator &terminator,
 305         const char *intrinsic, ACCUMULATOR &accumulator) const {
 306       // Element size of the destination descriptor is the size
 307       // of {TypeCategory::Integer, KIND}.
 308       PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(result, x,
 309           Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, mask,
 310           terminator, intrinsic, accumulator);
 311     }
 312   };
 313 };
 314
 315 // NORM2 templates
 316
 317 RT_VAR_GROUP_BEGIN
 318
 319 // Use at least double precision for accumulators.
 320 // Don't use __float128, it doesn't work with abs() or sqrt() yet.
 321 static constexpr RT_CONST_VAR_ATTRS int Norm2LargestLDKind{
 322 #if HAS_LDBL128 || HAS_FLOAT128
 323     16
 324 #elif HAS_FLOAT80
 325     10
 326 #else
 327     8
 328 #endif
 329 };
 330
 331 RT_VAR_GROUP_END
 332
 333 template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
 334 inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
 335     int dim, const Descriptor *mask, const char *intrinsic,
 336     Terminator &terminator) {
 337   using Type = CppTypeFor<CAT, KIND>;
 338   ACCUMULATOR accumulator{x};
 339   if (dim == 0 || x.rank() == 1) {
 340     // Total reduction
 341
 342     // Element size of the destination descriptor is the same
 343     // as the element size of the source.
 344     result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
 345         CFI_attribute_allocatable);
 346     if (int stat{result.Allocate()}) {
 347       terminator.Crash(
 348           "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
 349     }
 350     DoTotalReduction<Type>(x, dim, mask, accumulator, intrinsic, terminator);
 351     accumulator.GetResult(result.OffsetElement<Type>());
 352   } else {
 353     // Partial reduction
 354
 355     // Element size of the destination descriptor is the same
 356     // as the element size of the source.
 357     PartialReduction<ACCUMULATOR, CAT, KIND>(result, x, x.ElementBytes(), dim,
 358         mask, terminator, intrinsic, accumulator);
 359   }
 360 }
 361
 362 // The data type used by Norm2Accumulator.
 363 template <int KIND>
 364 using Norm2AccumType =
 365     CppTypeFor<TypeCategory::Real, std::clamp(KIND, 8, Norm2LargestLDKind)>;
 366
 367 template <int KIND> class Norm2Accumulator {
 368 public:
 369   using Type = CppTypeFor<TypeCategory::Real, KIND>;
 370   using AccumType = Norm2AccumType<KIND>;
 371   explicit RT_API_ATTRS Norm2Accumulator(const Descriptor &array)
 372       : array_{array} {}
 373   RT_API_ATTRS void Reinitialize() { max_ = sum_ = 0; }
 374   template <typename A>
 375   RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
 376     // m * sqrt(1 + sum((others(:)/m)**2))
 377     *p = static_cast<Type>(max_ * SQRTTy<AccumType>::compute(1 + sum_));
 378   }
 379   RT_API_ATTRS bool Accumulate(Type x) {
 380     auto absX{ABSTy<AccumType>::compute(static_cast<AccumType>(x))};
 381     if (!max_) {
 382       max_ = absX;
 383     } else if (absX > max_) {
 384       auto t{max_ / absX}; // < 1.0
 385       auto tsq{t * t};
 386       sum_ *= tsq; // scale sum to reflect change to the max
 387       sum_ += tsq; // include a term for the previous max
 388       max_ = absX;
 389     } else { // absX <= max_
 390       auto t{absX / max_};
 391       sum_ += t * t;
 392     }
 393     return true;
 394   }
 395   template <typename A>
 396   RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
 397     return Accumulate(*array_.Element<A>(at));
 398   }
 399
 400 private:
 401   const Descriptor &array_;
 402   AccumType max_{0}; // value (m) with largest magnitude
 403   AccumType sum_{0}; // sum((others(:)/m)**2)
 404 };
 405
 406 template <int KIND> struct Norm2Helper {
 407   RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, int dim,
 408       const Descriptor *mask, Terminator &terminator) const {
 409     DoMaxMinNorm2<TypeCategory::Real, KIND, Norm2Accumulator<KIND>>(
 410         result, x, dim, mask, "NORM2", terminator);
 411   }
 412 };
 413
 414 } // namespace Fortran::runtime
 415 #endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_