flang/runtime/reduction-templates.h

   1 //===-- runtime/reduction-templates.h -------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 // Generic function templates used by various reduction transformation
  10 // intrinsic functions (SUM, PRODUCT, &c.)
  11 //
  12 // * Partial reductions (i.e., those with DIM= arguments that are not
  13 //   required to be 1 by the rank of the argument) return arrays that
  14 //   are dynamically allocated in a caller-supplied descriptor.
  15 // * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC
  16 //   return integer vectors of some kind, not scalars; a caller-supplied
  17 //   descriptor is used
  18 // * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
  19 //   length results, dynamically allocated in a caller-supplied descriptor
  20
  21 #ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
  22 #define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
  23
  24 #include "terminator.h"
  25 #include "tools.h"
  26 #include "flang/Runtime/cpp-type.h"
  27 #include "flang/Runtime/descriptor.h"
  28
  29 namespace Fortran::runtime {
  30
  31 // Reductions are implemented with *accumulators*, which are instances of
  32 // classes that incrementally build up the result (or an element thereof) during
  33 // a traversal of the unmasked elements of an array.  Each accumulator class
  34 // supports a constructor (which captures a reference to the array), an
  35 // AccumulateAt() member function that applies supplied subscripts to the
  36 // array and does something with a scalar element, and a GetResult()
  37 // member function that copies a final result into its destination.
  38
  39 // Total reduction of the array argument to a scalar (or to a vector in the
  40 // cases of FINDLOC, MAXLOC, & MINLOC).  These are the cases without DIM= or
  41 // cases where the argument has rank 1 and DIM=, if present, must be 1.
  42 template <typename TYPE, typename ACCUMULATOR>
  43 inline void DoTotalReduction(const Descriptor &x, int dim,
  44     const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
  45     Terminator &terminator) {
  46   if (dim < 0 || dim > 1) {
  47     terminator.Crash("%s: bad DIM=%d for ARRAY argument with rank %d",
  48         intrinsic, dim, x.rank());
  49   }
  50   SubscriptValue xAt[maxRank];
  51   x.GetLowerBounds(xAt);
  52   if (mask) {
  53     CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
  54     SubscriptValue maskAt[maxRank];
  55     mask->GetLowerBounds(maskAt);
  56     if (mask->rank() > 0) {
  57       for (auto elements{x.Elements()}; elements--;
  58            x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
  59         if (IsLogicalElementTrue(*mask, maskAt)) {
  60           if (!accumulator.template AccumulateAt<TYPE>(xAt))
  61             break;
  62         }
  63       }
  64       return;
  65     } else if (!IsLogicalElementTrue(*mask, maskAt)) {
  66       // scalar MASK=.FALSE.: return identity value
  67       return;
  68     }
  69   }
  70   // No MASK=, or scalar MASK=.TRUE.
  71   for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
  72     if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
  73       break; // cut short, result is known
  74     }
  75   }
  76 }
  77
  78 template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
  79 inline CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
  80     const char *source, int line, int dim, const Descriptor *mask,
  81     ACCUMULATOR &&accumulator, const char *intrinsic) {
  82   Terminator terminator{source, line};
  83   RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type());
  84   using CppType = CppTypeFor<CAT, KIND>;
  85   DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
  86   CppType result;
  87 #ifdef _MSC_VER // work around MSVC spurious error
  88   accumulator.GetResult(&result);
  89 #else
  90   accumulator.template GetResult(&result);
  91 #endif
  92   return result;
  93 }
  94
  95 // For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
  96 // of the array is [2,3,5], the shape of the result is [2,5] and
  97 // result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
  98 // lower bounds other than one.  This utility subroutine creates an
  99 // array of subscripts [j,_,k] for result subscripts [j,k] so that the
 100 // elements of array(j,:,k) can be reduced.
 101 inline void GetExpandedSubscripts(SubscriptValue at[],
 102     const Descriptor &descriptor, int zeroBasedDim,
 103     const SubscriptValue from[]) {
 104   descriptor.GetLowerBounds(at);
 105   int rank{descriptor.rank()};
 106   int j{0};
 107   for (; j < zeroBasedDim; ++j) {
 108     at[j] += from[j] - 1 /*lower bound*/;
 109   }
 110   for (++j; j < rank; ++j) {
 111     at[j] += from[j - 1] - 1;
 112   }
 113 }
 114
 115 template <typename TYPE, typename ACCUMULATOR>
 116 inline void ReduceDimToScalar(const Descriptor &x, int zeroBasedDim,
 117     SubscriptValue subscripts[], TYPE *result, ACCUMULATOR &accumulator) {
 118   SubscriptValue xAt[maxRank];
 119   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
 120   const auto &dim{x.GetDimension(zeroBasedDim)};
 121   SubscriptValue at{dim.LowerBound()};
 122   for (auto n{dim.Extent()}; n-- > 0; ++at) {
 123     xAt[zeroBasedDim] = at;
 124     if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
 125       break;
 126     }
 127   }
 128 #ifdef _MSC_VER // work around MSVC spurious error
 129   accumulator.GetResult(result, zeroBasedDim);
 130 #else
 131   accumulator.template GetResult(result, zeroBasedDim);
 132 #endif
 133 }
 134
 135 template <typename TYPE, typename ACCUMULATOR>
 136 inline void ReduceDimMaskToScalar(const Descriptor &x, int zeroBasedDim,
 137     SubscriptValue subscripts[], const Descriptor &mask, TYPE *result,
 138     ACCUMULATOR &accumulator) {
 139   SubscriptValue xAt[maxRank], maskAt[maxRank];
 140   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
 141   GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
 142   const auto &xDim{x.GetDimension(zeroBasedDim)};
 143   SubscriptValue xPos{xDim.LowerBound()};
 144   const auto &maskDim{mask.GetDimension(zeroBasedDim)};
 145   SubscriptValue maskPos{maskDim.LowerBound()};
 146   for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0;
 147        ++xPos, ++maskPos) {
 148     maskAt[zeroBasedDim] = maskPos;
 149     if (IsLogicalElementTrue(mask, maskAt)) {
 150       xAt[zeroBasedDim] = xPos;
 151       if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
 152         break;
 153       }
 154     }
 155   }
 156 #ifdef _MSC_VER // work around MSVC spurious error
 157   accumulator.GetResult(result, zeroBasedDim);
 158 #else
 159   accumulator.template GetResult(result, zeroBasedDim);
 160 #endif
 161 }
 162
 163 // Utility: establishes & allocates the result array for a partial
 164 // reduction (i.e., one with DIM=).
 165 static void CreatePartialReductionResult(Descriptor &result,
 166     const Descriptor &x, std::size_t resultElementSize, int dim,
 167     Terminator &terminator, const char *intrinsic, TypeCode typeCode) {
 168   int xRank{x.rank()};
 169   if (dim < 1 || dim > xRank) {
 170     terminator.Crash(
 171         "%s: bad DIM=%d for ARRAY with rank %d", intrinsic, dim, xRank);
 172   }
 173   int zeroBasedDim{dim - 1};
 174   SubscriptValue resultExtent[maxRank];
 175   for (int j{0}; j < zeroBasedDim; ++j) {
 176     resultExtent[j] = x.GetDimension(j).Extent();
 177   }
 178   for (int j{zeroBasedDim + 1}; j < xRank; ++j) {
 179     resultExtent[j - 1] = x.GetDimension(j).Extent();
 180   }
 181   result.Establish(typeCode, resultElementSize, nullptr, xRank - 1,
 182       resultExtent, CFI_attribute_allocatable);
 183   for (int j{0}; j + 1 < xRank; ++j) {
 184     result.GetDimension(j).SetBounds(1, resultExtent[j]);
 185   }
 186   if (int stat{result.Allocate()}) {
 187     terminator.Crash(
 188         "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
 189   }
 190 }
 191
 192 // Partial reductions with DIM=
 193
 194 template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
 195 inline void PartialReduction(Descriptor &result, const Descriptor &x,
 196     std::size_t resultElementSize, int dim, const Descriptor *mask,
 197     Terminator &terminator, const char *intrinsic, ACCUMULATOR &accumulator) {
 198   CreatePartialReductionResult(result, x, resultElementSize, dim, terminator,
 199       intrinsic, TypeCode{CAT, KIND});
 200   SubscriptValue at[maxRank];
 201   result.GetLowerBounds(at);
 202   INTERNAL_CHECK(result.rank() == 0 || at[0] == 1);
 203   using CppType = CppTypeFor<CAT, KIND>;
 204   if (mask) {
 205     CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
 206     SubscriptValue maskAt[maxRank]; // contents unused
 207     if (mask->rank() > 0) {
 208       for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
 209         accumulator.Reinitialize();
 210         ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
 211             x, dim - 1, at, *mask, result.Element<CppType>(at), accumulator);
 212       }
 213       return;
 214     } else if (!IsLogicalElementTrue(*mask, maskAt)) {
 215       // scalar MASK=.FALSE.
 216       accumulator.Reinitialize();
 217       for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
 218         accumulator.GetResult(result.Element<CppType>(at));
 219       }
 220       return;
 221     }
 222   }
 223   // No MASK= or scalar MASK=.TRUE.
 224   for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
 225     accumulator.Reinitialize();
 226     ReduceDimToScalar<CppType, ACCUMULATOR>(
 227         x, dim - 1, at, result.Element<CppType>(at), accumulator);
 228   }
 229 }
 230
 231 template <template <typename> class ACCUM>
 232 struct PartialIntegerReductionHelper {
 233   template <int KIND> struct Functor {
 234     static constexpr int Intermediate{
 235         std::max(KIND, 4)}; // use at least "int" for intermediate results
 236     void operator()(Descriptor &result, const Descriptor &x, int dim,
 237         const Descriptor *mask, Terminator &terminator,
 238         const char *intrinsic) const {
 239       using Accumulator =
 240           ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
 241       Accumulator accumulator{x};
 242       // Element size of the destination descriptor is the same
 243       // as the element size of the source.
 244       PartialReduction<Accumulator, TypeCategory::Integer, KIND>(result, x,
 245           x.ElementBytes(), dim, mask, terminator, intrinsic, accumulator);
 246     }
 247   };
 248 };
 249
 250 template <template <typename> class INTEGER_ACCUM>
 251 inline void PartialIntegerReduction(Descriptor &result, const Descriptor &x,
 252     int dim, int kind, const Descriptor *mask, const char *intrinsic,
 253     Terminator &terminator) {
 254   ApplyIntegerKind<
 255       PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>(
 256       kind, terminator, result, x, dim, mask, terminator, intrinsic);
 257 }
 258
 259 template <TypeCategory CAT, template <typename> class ACCUM>
 260 struct PartialFloatingReductionHelper {
 261   template <int KIND> struct Functor {
 262     static constexpr int Intermediate{
 263         std::max(KIND, 8)}; // use at least "double" for intermediate results
 264     void operator()(Descriptor &result, const Descriptor &x, int dim,
 265         const Descriptor *mask, Terminator &terminator,
 266         const char *intrinsic) const {
 267       using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
 268       Accumulator accumulator{x};
 269       // Element size of the destination descriptor is the same
 270       // as the element size of the source.
 271       PartialReduction<Accumulator, CAT, KIND>(result, x, x.ElementBytes(), dim,
 272           mask, terminator, intrinsic, accumulator);
 273     }
 274   };
 275 };
 276
 277 template <template <typename> class INTEGER_ACCUM,
 278     template <typename> class REAL_ACCUM,
 279     template <typename> class COMPLEX_ACCUM>
 280 inline void TypedPartialNumericReduction(Descriptor &result,
 281     const Descriptor &x, int dim, const char *source, int line,
 282     const Descriptor *mask, const char *intrinsic) {
 283   Terminator terminator{source, line};
 284   auto catKind{x.type().GetCategoryAndKind()};
 285   RUNTIME_CHECK(terminator, catKind.has_value());
 286   switch (catKind->first) {
 287   case TypeCategory::Integer:
 288     PartialIntegerReduction<INTEGER_ACCUM>(
 289         result, x, dim, catKind->second, mask, intrinsic, terminator);
 290     break;
 291   case TypeCategory::Real:
 292     ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real,
 293                                REAL_ACCUM>::template Functor,
 294         void>(catKind->second, terminator, result, x, dim, mask, terminator,
 295         intrinsic);
 296     break;
 297   case TypeCategory::Complex:
 298     ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex,
 299                                COMPLEX_ACCUM>::template Functor,
 300         void>(catKind->second, terminator, result, x, dim, mask, terminator,
 301         intrinsic);
 302     break;
 303   default:
 304     terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
 305   }
 306 }
 307
 308 template <typename ACCUMULATOR> struct LocationResultHelper {
 309   template <int KIND> struct Functor {
 310     void operator()(ACCUMULATOR &accumulator, const Descriptor &result) const {
 311       accumulator.GetResult(
 312           result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>());
 313     }
 314   };
 315 };
 316
 317 template <typename ACCUMULATOR> struct PartialLocationHelper {
 318   template <int KIND> struct Functor {
 319     void operator()(Descriptor &result, const Descriptor &x, int dim,
 320         const Descriptor *mask, Terminator &terminator, const char *intrinsic,
 321         ACCUMULATOR &accumulator) const {
 322       // Element size of the destination descriptor is the size
 323       // of {TypeCategory::Integer, KIND}.
 324       PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(result, x,
 325           Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, mask,
 326           terminator, intrinsic, accumulator);
 327     }
 328   };
 329 };
 330
 331 } // namespace Fortran::runtime
 332 #endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_