[flang][cuda] Adapt ExternalNameConversion to work in gpu module (#117039)
[llvm-project.git] / flang / runtime / reduction.cpp
blob074a270cb508382fd5137dbfce6e60c0316cceb5
1 //===-- runtime/reduction.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 // Implements ALL, ANY, COUNT, IALL, IANY, IPARITY, & PARITY for all required
10 // operand types and shapes.
12 // DOT_PRODUCT, FINDLOC, MATMUL, SUM, and PRODUCT are in their own eponymous
13 // source files.
14 // NORM2, MAXLOC, MINLOC, MAXVAL, and MINVAL are in extrema.cpp.
16 #include "flang/Runtime/reduction.h"
17 #include "reduction-templates.h"
18 #include "flang/Runtime/descriptor.h"
19 #include <cinttypes>
21 namespace Fortran::runtime {
23 // IALL, IANY, IPARITY
25 template <typename INTERMEDIATE> class IntegerAndAccumulator {
26 public:
27 explicit RT_API_ATTRS IntegerAndAccumulator(const Descriptor &array)
28 : array_{array} {}
29 RT_API_ATTRS void Reinitialize() { and_ = ~INTERMEDIATE{0}; }
30 template <typename A>
31 RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
32 *p = static_cast<A>(and_);
34 template <typename A>
35 RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
36 and_ &= *array_.Element<A>(at);
37 return true;
40 private:
41 const Descriptor &array_;
42 INTERMEDIATE and_{~INTERMEDIATE{0}};
45 template <typename INTERMEDIATE> class IntegerOrAccumulator {
46 public:
47 explicit RT_API_ATTRS IntegerOrAccumulator(const Descriptor &array)
48 : array_{array} {}
49 RT_API_ATTRS void Reinitialize() { or_ = 0; }
50 template <typename A>
51 RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
52 *p = static_cast<A>(or_);
54 template <typename A>
55 RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
56 or_ |= *array_.Element<A>(at);
57 return true;
60 private:
61 const Descriptor &array_;
62 INTERMEDIATE or_{0};
65 template <typename INTERMEDIATE> class IntegerXorAccumulator {
66 public:
67 explicit RT_API_ATTRS IntegerXorAccumulator(const Descriptor &array)
68 : array_{array} {}
69 RT_API_ATTRS void Reinitialize() { xor_ = 0; }
70 template <typename A>
71 RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
72 *p = static_cast<A>(xor_);
74 template <typename A>
75 RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
76 xor_ ^= *array_.Element<A>(at);
77 return true;
80 private:
81 const Descriptor &array_;
82 INTERMEDIATE xor_{0};
85 extern "C" {
86 CppTypeFor<TypeCategory::Integer, 1> RTDEF(IAll1)(const Descriptor &x,
87 const char *source, int line, int dim, const Descriptor *mask) {
88 return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask,
89 IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL");
91 CppTypeFor<TypeCategory::Integer, 2> RTDEF(IAll2)(const Descriptor &x,
92 const char *source, int line, int dim, const Descriptor *mask) {
93 return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask,
94 IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL");
96 CppTypeFor<TypeCategory::Integer, 4> RTDEF(IAll4)(const Descriptor &x,
97 const char *source, int line, int dim, const Descriptor *mask) {
98 return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask,
99 IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL");
101 CppTypeFor<TypeCategory::Integer, 8> RTDEF(IAll8)(const Descriptor &x,
102 const char *source, int line, int dim, const Descriptor *mask) {
103 return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask,
104 IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "IALL");
106 #ifdef __SIZEOF_INT128__
107 CppTypeFor<TypeCategory::Integer, 16> RTDEF(IAll16)(const Descriptor &x,
108 const char *source, int line, int dim, const Descriptor *mask) {
109 return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim,
110 mask, IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x},
111 "IALL");
113 #endif
114 void RTDEF(IAllDim)(Descriptor &result, const Descriptor &x, int dim,
115 const char *source, int line, const Descriptor *mask) {
116 Terminator terminator{source, line};
117 auto catKind{x.type().GetCategoryAndKind()};
118 RUNTIME_CHECK(terminator,
119 catKind.has_value() && catKind->first == TypeCategory::Integer);
120 PartialIntegerReduction<IntegerAndAccumulator>(
121 result, x, dim, catKind->second, mask, "IALL", terminator);
124 CppTypeFor<TypeCategory::Integer, 1> RTDEF(IAny1)(const Descriptor &x,
125 const char *source, int line, int dim, const Descriptor *mask) {
126 return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask,
127 IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY");
129 CppTypeFor<TypeCategory::Integer, 2> RTDEF(IAny2)(const Descriptor &x,
130 const char *source, int line, int dim, const Descriptor *mask) {
131 return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask,
132 IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY");
134 CppTypeFor<TypeCategory::Integer, 4> RTDEF(IAny4)(const Descriptor &x,
135 const char *source, int line, int dim, const Descriptor *mask) {
136 return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask,
137 IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY");
139 CppTypeFor<TypeCategory::Integer, 8> RTDEF(IAny8)(const Descriptor &x,
140 const char *source, int line, int dim, const Descriptor *mask) {
141 return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask,
142 IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "IANY");
144 #ifdef __SIZEOF_INT128__
145 CppTypeFor<TypeCategory::Integer, 16> RTDEF(IAny16)(const Descriptor &x,
146 const char *source, int line, int dim, const Descriptor *mask) {
147 return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim,
148 mask, IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x},
149 "IANY");
151 #endif
152 void RTDEF(IAnyDim)(Descriptor &result, const Descriptor &x, int dim,
153 const char *source, int line, const Descriptor *mask) {
154 Terminator terminator{source, line};
155 auto catKind{x.type().GetCategoryAndKind()};
156 RUNTIME_CHECK(terminator,
157 catKind.has_value() && catKind->first == TypeCategory::Integer);
158 PartialIntegerReduction<IntegerOrAccumulator>(
159 result, x, dim, catKind->second, mask, "IANY", terminator);
162 CppTypeFor<TypeCategory::Integer, 1> RTDEF(IParity1)(const Descriptor &x,
163 const char *source, int line, int dim, const Descriptor *mask) {
164 return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask,
165 IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
166 "IPARITY");
168 CppTypeFor<TypeCategory::Integer, 2> RTDEF(IParity2)(const Descriptor &x,
169 const char *source, int line, int dim, const Descriptor *mask) {
170 return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask,
171 IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
172 "IPARITY");
174 CppTypeFor<TypeCategory::Integer, 4> RTDEF(IParity4)(const Descriptor &x,
175 const char *source, int line, int dim, const Descriptor *mask) {
176 return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask,
177 IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
178 "IPARITY");
180 CppTypeFor<TypeCategory::Integer, 8> RTDEF(IParity8)(const Descriptor &x,
181 const char *source, int line, int dim, const Descriptor *mask) {
182 return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask,
183 IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x},
184 "IPARITY");
186 #ifdef __SIZEOF_INT128__
187 CppTypeFor<TypeCategory::Integer, 16> RTDEF(IParity16)(const Descriptor &x,
188 const char *source, int line, int dim, const Descriptor *mask) {
189 return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim,
190 mask, IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x},
191 "IPARITY");
193 #endif
194 void RTDEF(IParityDim)(Descriptor &result, const Descriptor &x, int dim,
195 const char *source, int line, const Descriptor *mask) {
196 Terminator terminator{source, line};
197 auto catKind{x.type().GetCategoryAndKind()};
198 RUNTIME_CHECK(terminator,
199 catKind.has_value() && catKind->first == TypeCategory::Integer);
200 PartialIntegerReduction<IntegerXorAccumulator>(
201 result, x, dim, catKind->second, mask, "IPARITY", terminator);
205 // ALL, ANY, COUNT, & PARITY
207 enum class LogicalReduction { All, Any, Parity };
209 template <LogicalReduction REDUCTION> class LogicalAccumulator {
210 public:
211 using Type = bool;
212 explicit LogicalAccumulator(const Descriptor &array) : array_{array} {}
213 void Reinitialize() { result_ = REDUCTION == LogicalReduction::All; }
214 bool Result() const { return result_; }
215 bool Accumulate(bool x) {
216 if constexpr (REDUCTION == LogicalReduction::Parity) {
217 result_ = result_ != x;
218 } else if (x != (REDUCTION == LogicalReduction::All)) {
219 result_ = x;
220 return false;
222 return true;
224 template <typename IGNORED = void>
225 bool AccumulateAt(const SubscriptValue at[]) {
226 return Accumulate(IsLogicalElementTrue(array_, at));
229 private:
230 const Descriptor &array_;
231 bool result_{REDUCTION == LogicalReduction::All};
234 template <typename ACCUMULATOR>
235 inline auto GetTotalLogicalReduction(const Descriptor &x, const char *source,
236 int line, int dim, ACCUMULATOR &&accumulator, const char *intrinsic) ->
237 typename ACCUMULATOR::Type {
238 Terminator terminator{source, line};
239 if (dim < 0 || dim > 1) {
240 terminator.Crash("%s: bad DIM=%d for ARRAY with rank=1", intrinsic, dim);
242 SubscriptValue xAt[maxRank];
243 x.GetLowerBounds(xAt);
244 for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
245 if (!accumulator.AccumulateAt(xAt)) {
246 break; // cut short, result is known
249 return accumulator.Result();
252 template <typename ACCUMULATOR>
253 inline auto ReduceLogicalDimToScalar(const Descriptor &x, int zeroBasedDim,
254 SubscriptValue subscripts[]) -> typename ACCUMULATOR::Type {
255 ACCUMULATOR accumulator{x};
256 SubscriptValue xAt[maxRank];
257 GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
258 const auto &dim{x.GetDimension(zeroBasedDim)};
259 SubscriptValue at{dim.LowerBound()};
260 for (auto n{dim.Extent()}; n-- > 0; ++at) {
261 xAt[zeroBasedDim] = at;
262 if (!accumulator.AccumulateAt(xAt)) {
263 break;
266 return accumulator.Result();
269 template <LogicalReduction REDUCTION> struct LogicalReduceHelper {
270 template <int KIND> struct Functor {
271 void operator()(Descriptor &result, const Descriptor &x, int dim,
272 Terminator &terminator, const char *intrinsic) const {
273 // Standard requires result to have same LOGICAL kind as argument.
274 CreatePartialReductionResult(
275 result, x, x.ElementBytes(), dim, terminator, intrinsic, x.type());
276 SubscriptValue at[maxRank];
277 result.GetLowerBounds(at);
278 INTERNAL_CHECK(result.rank() == 0 || at[0] == 1);
279 using CppType = CppTypeFor<TypeCategory::Logical, KIND>;
280 for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
281 *result.Element<CppType>(at) =
282 ReduceLogicalDimToScalar<LogicalAccumulator<REDUCTION>>(
283 x, dim - 1, at);
289 template <LogicalReduction REDUCTION>
290 inline void DoReduceLogicalDimension(Descriptor &result, const Descriptor &x,
291 int dim, Terminator &terminator, const char *intrinsic) {
292 auto catKind{x.type().GetCategoryAndKind()};
293 RUNTIME_CHECK(terminator, catKind && catKind->first == TypeCategory::Logical);
294 ApplyLogicalKind<LogicalReduceHelper<REDUCTION>::template Functor, void>(
295 catKind->second, terminator, result, x, dim, terminator, intrinsic);
298 // COUNT
300 class CountAccumulator {
301 public:
302 using Type = std::int64_t;
303 explicit CountAccumulator(const Descriptor &array) : array_{array} {}
304 void Reinitialize() { result_ = 0; }
305 Type Result() const { return result_; }
306 template <typename IGNORED = void>
307 bool AccumulateAt(const SubscriptValue at[]) {
308 if (IsLogicalElementTrue(array_, at)) {
309 ++result_;
311 return true;
314 private:
315 const Descriptor &array_;
316 Type result_{0};
319 template <int KIND> struct CountDimension {
320 void operator()(Descriptor &result, const Descriptor &x, int dim,
321 Terminator &terminator) const {
322 // Element size of the descriptor descriptor is the size
323 // of {TypeCategory::Integer, KIND}.
324 CreatePartialReductionResult(result, x,
325 Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, terminator,
326 "COUNT", TypeCode{TypeCategory::Integer, KIND});
327 SubscriptValue at[maxRank];
328 result.GetLowerBounds(at);
329 INTERNAL_CHECK(result.rank() == 0 || at[0] == 1);
330 using CppType = CppTypeFor<TypeCategory::Integer, KIND>;
331 for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
332 *result.Element<CppType>(at) =
333 ReduceLogicalDimToScalar<CountAccumulator>(x, dim - 1, at);
338 extern "C" {
339 RT_EXT_API_GROUP_BEGIN
341 bool RTDEF(All)(const Descriptor &x, const char *source, int line, int dim) {
342 return GetTotalLogicalReduction(x, source, line, dim,
343 LogicalAccumulator<LogicalReduction::All>{x}, "ALL");
345 void RTDEF(AllDim)(Descriptor &result, const Descriptor &x, int dim,
346 const char *source, int line) {
347 Terminator terminator{source, line};
348 DoReduceLogicalDimension<LogicalReduction::All>(
349 result, x, dim, terminator, "ALL");
352 bool RTDEF(Any)(const Descriptor &x, const char *source, int line, int dim) {
353 return GetTotalLogicalReduction(x, source, line, dim,
354 LogicalAccumulator<LogicalReduction::Any>{x}, "ANY");
356 void RTDEF(AnyDim)(Descriptor &result, const Descriptor &x, int dim,
357 const char *source, int line) {
358 Terminator terminator{source, line};
359 DoReduceLogicalDimension<LogicalReduction::Any>(
360 result, x, dim, terminator, "ANY");
363 std::int64_t RTDEF(Count)(
364 const Descriptor &x, const char *source, int line, int dim) {
365 return GetTotalLogicalReduction(
366 x, source, line, dim, CountAccumulator{x}, "COUNT");
369 void RTDEF(CountDim)(Descriptor &result, const Descriptor &x, int dim, int kind,
370 const char *source, int line) {
371 Terminator terminator{source, line};
372 ApplyIntegerKind<CountDimension, void>(
373 kind, terminator, result, x, dim, terminator);
376 bool RTDEF(Parity)(const Descriptor &x, const char *source, int line, int dim) {
377 return GetTotalLogicalReduction(x, source, line, dim,
378 LogicalAccumulator<LogicalReduction::Parity>{x}, "PARITY");
380 void RTDEF(ParityDim)(Descriptor &result, const Descriptor &x, int dim,
381 const char *source, int line) {
382 Terminator terminator{source, line};
383 DoReduceLogicalDimension<LogicalReduction::Parity>(
384 result, x, dim, terminator, "PARITY");
387 RT_EXT_API_GROUP_END
388 } // extern "C"
389 } // namespace Fortran::runtime