2 * kmp_collapse.h -- header for loop collapse feature
5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 #ifndef KMP_COLLAPSE_H
14 #define KMP_COLLAPSE_H
16 #include <type_traits>
18 // Type of the index into the loop nest structures
19 // (with values from 0 to less than n from collapse(n))
20 typedef kmp_int32 kmp_index_t
;
22 // Type for combined loop nest space IV:
23 typedef kmp_uint64 kmp_loop_nest_iv_t
;
25 // Loop has <, <=, etc. as a comparison:
26 enum comparison_t
: kmp_int32
{
28 comp_greater_or_eq
= 1,
35 // Type of bounds and step, after usual promotions
36 // are a subset of these types (32 & 64 only):
37 enum loop_type_t
: kmp_int32
{
48 // Defining loop types to handle special cases
49 enum nested_loop_type_t
: kmp_int32
{
50 nested_loop_type_unkown
= 0,
51 nested_loop_type_lower_triangular_matrix
= 1,
52 nested_loop_type_upper_triangular_matrix
= 2
57 * Describes the structure for rectangular nested loops.
59 template <typename T
> struct bounds_infoXX_template
{
61 // typedef typename traits_t<T>::unsigned_t UT;
62 typedef typename traits_t
<T
>::signed_t ST
;
64 loop_type_t loop_type
; // The differentiator
65 loop_type_t loop_iv_type
;
66 comparison_t comparison
;
67 // outer_iv should be 0 (or any other less then number of dimentions)
68 // if loop doesn't depend on it (lb1 and ub1 will be 0).
69 // This way we can do multiplication without a check.
72 // unions to keep the size constant:
75 kmp_uint64 lb0_u64
; // real type can be signed
80 kmp_uint64 lb1_u64
; // real type can be signed
85 kmp_uint64 ub0_u64
; // real type can be signed
90 kmp_uint64 ub1_u64
; // real type can be signed
94 ST step
; // signed even if bounds type is unsigned
95 kmp_int64 step_64
; // signed
98 kmp_loop_nest_iv_t trip_count
;
102 @ingroup WORK_SHARING
103 * Interface struct for rectangular nested loops.
104 * Same size as bounds_infoXX_template.
106 struct bounds_info_t
{
108 loop_type_t loop_type
; // The differentiator
109 loop_type_t loop_iv_type
;
110 comparison_t comparison
;
111 // outer_iv should be 0 (or any other less then number of dimentions)
112 // if loop doesn't depend on it (lb1 and ub1 will be 0).
113 // This way we can do multiplication without a check.
114 kmp_index_t outer_iv
;
116 kmp_uint64 lb0_u64
; // real type can be signed
117 kmp_uint64 lb1_u64
; // real type can be signed
118 kmp_uint64 ub0_u64
; // real type can be signed
119 kmp_uint64 ub1_u64
; // real type can be signed
120 kmp_int64 step_64
; // signed
122 // This is internal, but it's the only internal thing we need
123 // in rectangular case, so let's expose it here:
124 kmp_loop_nest_iv_t trip_count
;
127 //-------------------------------------------------------------------------
128 // Additional types for internal representation:
130 // Array for a point in the loop space, in the original space.
131 // It's represented in kmp_uint64, but each dimention is calculated in
132 // that loop IV type. Also dimentions have to be converted to those types
133 // when used in generated code.
134 typedef kmp_uint64
*kmp_point_t
;
136 // Array: Number of loop iterations on each nesting level to achieve some point,
137 // in expanded space or in original space.
138 // OMPTODO: move from using iterations to using offsets (iterations multiplied
139 // by steps). For those we need to be careful with the types, as step can be
140 // negative, but it'll remove multiplications and divisions in several places.
141 typedef kmp_loop_nest_iv_t
*kmp_iterations_t
;
143 // Internal struct with additional info:
144 template <typename T
> struct bounds_info_internalXX_template
{
146 // OMPTODO: should span have type T or should it better be
147 // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than
148 // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to
149 // use big_span_t, if it can be resolved at compile time.
151 typename
std::conditional
<std::is_signed
<T
>::value
, kmp_int64
, kmp_uint64
>
154 // typedef typename big_span_t span_t;
157 bounds_infoXX_template
<T
> b
; // possibly adjusted bounds
159 // Leaving this as a union in case we'll switch to span_t with different sizes
162 // Smallest possible value of iv (may be smaller than actually possible)
163 span_t span_smallest
;
164 kmp_uint64 span_smallest_u64
;
167 // Leaving this as a union in case we'll switch to span_t with different sizes
170 // Biggest possible value of iv (may be bigger than actually possible)
172 kmp_uint64 span_biggest_u64
;
175 // Did we adjust loop bounds (not counting canonicalization)?
176 bool loop_bounds_adjusted
;
179 // Internal struct with additional info:
180 struct bounds_info_internal_t
{
182 bounds_info_t b
; // possibly adjusted bounds
184 // Smallest possible value of iv (may be smaller than actually possible)
185 kmp_uint64 span_smallest_u64
;
187 // Biggest possible value of iv (may be bigger than actually possible)
188 kmp_uint64 span_biggest_u64
;
190 // Did we adjust loop bounds (not counting canonicalization)?
191 bool loop_bounds_adjusted
;
194 //----------APIs for rectangular loop nests--------------------------------
196 // Canonicalize loop nest and calculate overall trip count.
197 // "bounds_nest" has to be allocated per thread.
198 // API will modify original bounds_nest array to bring it to a canonical form
199 // (only <= and >=, no !=, <, >). If the original loop nest was already in a
200 // canonical form there will be no changes to bounds in bounds_nest array
201 // (only trip counts will be calculated).
202 // Returns trip count of overall space.
203 extern "C" kmp_loop_nest_iv_t
204 __kmpc_process_loop_nest_rectang(ident_t
*loc
, kmp_int32 gtid
,
205 /*in/out*/ bounds_info_t
*original_bounds_nest
,
208 // Calculate old induction variables corresponding to overall new_iv.
209 // Note: original IV will be returned as if it had kmp_uint64 type,
210 // will have to be converted to original type in user code.
211 // Note: trip counts should be already calculated by
212 // __kmpc_process_loop_nest_rectang.
213 // OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline
214 // that into user code.
216 __kmpc_calc_original_ivs_rectang(ident_t
*loc
, kmp_loop_nest_iv_t new_iv
,
217 const bounds_info_t
*original_bounds_nest
,
218 /*out*/ kmp_uint64
*original_ivs
,
221 //----------Init API for non-rectangular loops--------------------------------
223 // Init API for collapsed loops (static, no chunks defined).
224 // "bounds_nest" has to be allocated per thread.
225 // API will modify original bounds_nest array to bring it to a canonical form
226 // (only <= and >=, no !=, <, >). If the original loop nest was already in a
227 // canonical form there will be no changes to bounds in bounds_nest array
228 // (only trip counts will be calculated). Internally API will expand the space
229 // to parallelogram/parallelepiped, calculate total, calculate bounds for the
230 // chunks in terms of the new IV, re-calc them in terms of old IVs (especially
231 // important on the left side, to hit the lower bounds and not step over), and
232 // pick the correct chunk for this thread (so it will calculate chunks up to the
233 // needed one). It could be optimized to calculate just this chunk, potentially
234 // a bit less well distributed among threads. It is designed to make sure that
235 // threads will receive predictable chunks, deterministically (so that next nest
236 // of loops with similar characteristics will get exactly same chunks on same
238 // Current contract: chunk_bounds_nest has only lb0 and ub0,
239 // lb1 and ub1 are set to 0 and can be ignored. (This may change in the future).
241 __kmpc_for_collapsed_init(ident_t
*loc
, kmp_int32 gtid
,
242 /*in/out*/ bounds_info_t
*original_bounds_nest
,
243 /*out*/ bounds_info_t
*chunk_bounds_nest
,
245 /*out*/ kmp_int32
*plastiter
);
247 #endif // KMP_COLLAPSE_H