openmp/runtime/src/kmp_collapse.h

   1 /*
   2  * kmp_collapse.h -- header for loop collapse feature
   3  */
   4
   5 //===----------------------------------------------------------------------===//
   6 //
   7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   8 // See https://llvm.org/LICENSE.txt for license information.
   9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #ifndef KMP_COLLAPSE_H
  14 #define KMP_COLLAPSE_H
  15
  16 #include <type_traits>
  17
  18 // Type of the index into the loop nest structures
  19 // (with values from 0 to less than n from collapse(n))
  20 typedef kmp_int32 kmp_index_t;
  21
  22 // Type for combined loop nest space IV:
  23 typedef kmp_uint64 kmp_loop_nest_iv_t;
  24
  25 // Loop has <, <=, etc. as a comparison:
  26 enum comparison_t : kmp_int32 {
  27   comp_less_or_eq = 0,
  28   comp_greater_or_eq = 1,
  29   comp_not_eq = 2,
  30   comp_less = 3,
  31   comp_greater = 4
  32 };
  33
  34 // Type of loop IV.
  35 // Type of bounds and step, after usual promotions
  36 // are a subset of these types (32 & 64 only):
  37 enum loop_type_t : kmp_int32 {
  38   loop_type_uint8 = 0,
  39   loop_type_int8 = 1,
  40   loop_type_uint16 = 2,
  41   loop_type_int16 = 3,
  42   loop_type_uint32 = 4,
  43   loop_type_int32 = 5,
  44   loop_type_uint64 = 6,
  45   loop_type_int64 = 7
  46 };
  47
  48 // Defining loop types to handle special cases
  49 enum nested_loop_type_t : kmp_int32 {
  50   nested_loop_type_unkown = 0,
  51   nested_loop_type_lower_triangular_matrix = 1,
  52   nested_loop_type_upper_triangular_matrix = 2
  53 };
  54
  55 /*!
  56  @ingroup WORK_SHARING
  57  * Describes the structure for rectangular nested loops.
  58  */
  59 template <typename T> struct bounds_infoXX_template {
  60
  61   // typedef typename traits_t<T>::unsigned_t UT;
  62   typedef typename traits_t<T>::signed_t ST;
  63
  64   loop_type_t loop_type; // The differentiator
  65   loop_type_t loop_iv_type;
  66   comparison_t comparison;
  67   // outer_iv should be 0 (or any other less then number of dimentions)
  68   // if loop doesn't depend on it (lb1 and ub1 will be 0).
  69   // This way we can do multiplication without a check.
  70   kmp_index_t outer_iv;
  71
  72   // unions to keep the size constant:
  73   union {
  74     T lb0;
  75     kmp_uint64 lb0_u64; // real type can be signed
  76   };
  77
  78   union {
  79     T lb1;
  80     kmp_uint64 lb1_u64; // real type can be signed
  81   };
  82
  83   union {
  84     T ub0;
  85     kmp_uint64 ub0_u64; // real type can be signed
  86   };
  87
  88   union {
  89     T ub1;
  90     kmp_uint64 ub1_u64; // real type can be signed
  91   };
  92
  93   union {
  94     ST step; // signed even if bounds type is unsigned
  95     kmp_int64 step_64; // signed
  96   };
  97
  98   kmp_loop_nest_iv_t trip_count;
  99 };
 100
 101 /*!
 102  @ingroup WORK_SHARING
 103  * Interface struct for rectangular nested loops.
 104  * Same size as bounds_infoXX_template.
 105  */
 106 struct bounds_info_t {
 107
 108   loop_type_t loop_type; // The differentiator
 109   loop_type_t loop_iv_type;
 110   comparison_t comparison;
 111   // outer_iv should be 0  (or any other less then number of dimentions)
 112   // if loop doesn't depend on it (lb1 and ub1 will be 0).
 113   // This way we can do multiplication without a check.
 114   kmp_index_t outer_iv;
 115
 116   kmp_uint64 lb0_u64; // real type can be signed
 117   kmp_uint64 lb1_u64; // real type can be signed
 118   kmp_uint64 ub0_u64; // real type can be signed
 119   kmp_uint64 ub1_u64; // real type can be signed
 120   kmp_int64 step_64; // signed
 121
 122   // This is internal, but it's the only internal thing we need
 123   // in rectangular case, so let's expose it here:
 124   kmp_loop_nest_iv_t trip_count;
 125 };
 126
 127 //-------------------------------------------------------------------------
 128 // Additional types for internal representation:
 129
 130 // Array for a point in the loop space, in the original space.
 131 // It's represented in kmp_uint64, but each dimention is calculated in
 132 // that loop IV type. Also dimentions have to be converted to those types
 133 // when used in generated code.
 134 typedef kmp_uint64 *kmp_point_t;
 135
 136 // Array: Number of loop iterations on each nesting level to achieve some point,
 137 // in expanded space or in original space.
 138 // OMPTODO: move from using iterations to using offsets (iterations multiplied
 139 // by steps). For those we need to be careful with the types, as step can be
 140 // negative, but it'll remove multiplications and divisions in several places.
 141 typedef kmp_loop_nest_iv_t *kmp_iterations_t;
 142
 143 // Internal struct with additional info:
 144 template <typename T> struct bounds_info_internalXX_template {
 145
 146   // OMPTODO: should span have type T or should it better be
 147   // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than
 148   // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to
 149   // use big_span_t, if it can be resolved at compile time.
 150   typedef
 151       typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64>
 152           big_span_t;
 153
 154   // typedef typename big_span_t span_t;
 155   typedef T span_t;
 156
 157   bounds_infoXX_template<T> b; // possibly adjusted bounds
 158
 159   // Leaving this as a union in case we'll switch to span_t with different sizes
 160   // (depending on T)
 161   union {
 162     // Smallest possible value of iv (may be smaller than actually possible)
 163     span_t span_smallest;
 164     kmp_uint64 span_smallest_u64;
 165   };
 166
 167   // Leaving this as a union in case we'll switch to span_t with different sizes
 168   // (depending on T)
 169   union {
 170     // Biggest possible value of iv (may be bigger than actually possible)
 171     span_t span_biggest;
 172     kmp_uint64 span_biggest_u64;
 173   };
 174
 175   // Did we adjust loop bounds (not counting canonicalization)?
 176   bool loop_bounds_adjusted;
 177 };
 178
 179 // Internal struct with additional info:
 180 struct bounds_info_internal_t {
 181
 182   bounds_info_t b; // possibly adjusted bounds
 183
 184   // Smallest possible value of iv (may be smaller than actually possible)
 185   kmp_uint64 span_smallest_u64;
 186
 187   // Biggest possible value of iv (may be bigger than actually possible)
 188   kmp_uint64 span_biggest_u64;
 189
 190   // Did we adjust loop bounds (not counting canonicalization)?
 191   bool loop_bounds_adjusted;
 192 };
 193
 194 //----------APIs for rectangular loop nests--------------------------------
 195
 196 // Canonicalize loop nest and calculate overall trip count.
 197 // "bounds_nest" has to be allocated per thread.
 198 // API will modify original bounds_nest array to bring it to a canonical form
 199 // (only <= and >=, no !=, <, >). If the original loop nest was already in a
 200 // canonical form there will be no changes to bounds in bounds_nest array
 201 // (only trip counts will be calculated).
 202 // Returns trip count of overall space.
 203 extern "C" kmp_loop_nest_iv_t
 204 __kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid,
 205                                  /*in/out*/ bounds_info_t *original_bounds_nest,
 206                                  kmp_index_t n);
 207
 208 // Calculate old induction variables corresponding to overall new_iv.
 209 // Note: original IV will be returned as if it had kmp_uint64 type,
 210 // will have to be converted to original type in user code.
 211 // Note: trip counts should be already calculated by
 212 // __kmpc_process_loop_nest_rectang.
 213 // OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline
 214 // that into user code.
 215 extern "C" void
 216 __kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv,
 217                                  const bounds_info_t *original_bounds_nest,
 218                                  /*out*/ kmp_uint64 *original_ivs,
 219                                  kmp_index_t n);
 220
 221 //----------Init API for non-rectangular loops--------------------------------
 222
 223 // Init API for collapsed loops (static, no chunks defined).
 224 // "bounds_nest" has to be allocated per thread.
 225 // API will modify original bounds_nest array to bring it to a canonical form
 226 // (only <= and >=, no !=, <, >). If the original loop nest was already in a
 227 // canonical form there will be no changes to bounds in bounds_nest array
 228 // (only trip counts will be calculated). Internally API will expand the space
 229 // to parallelogram/parallelepiped, calculate total, calculate bounds for the
 230 // chunks in terms of the new IV, re-calc them in terms of old IVs (especially
 231 // important on the left side, to hit the lower bounds and not step over), and
 232 // pick the correct chunk for this thread (so it will calculate chunks up to the
 233 // needed one). It could be optimized to calculate just this chunk, potentially
 234 // a bit less well distributed among threads. It is designed to make sure that
 235 // threads will receive predictable chunks, deterministically (so that next nest
 236 // of loops with similar characteristics will get exactly same chunks on same
 237 // threads).
 238 // Current contract: chunk_bounds_nest has only lb0 and ub0,
 239 // lb1 and ub1 are set to 0 and can be ignored. (This may change in the future).
 240 extern "C" kmp_int32
 241 __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid,
 242                           /*in/out*/ bounds_info_t *original_bounds_nest,
 243                           /*out*/ bounds_info_t *chunk_bounds_nest,
 244                           kmp_index_t n,
 245                           /*out*/ kmp_int32 *plastiter);
 246
 247 #endif // KMP_COLLAPSE_H