2 * Copyright 2013 Ecole Normale Superieure
3 * Copyright 2015 Sven Verdoolaege
5 * Use of this software is governed by the MIT license
7 * Written by Sven Verdoolaege,
8 * Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
13 #include <isl/space.h>
14 #include <isl/constraint.h>
19 #include <isl/union_set.h>
20 #include <isl/union_map.h>
25 /* The hybrid tiling implemented in this file is based on
26 * Grosser et al., "Hybrid Hexagonal/Classical Tiling for GPUs".
29 /* Bounds on relative dependence distances in input to hybrid tiling.
30 * upper is an upper bound on the relative dependence distances
31 * in the first space dimension
32 * -lower is a lower bound on the relative dependence distances
33 * in all space dimensions.
41 * for each dependence distance vector d, where d_1 is the component
42 * corresponding to the first space dimension.
44 * upper and lower are always non-negative.
45 * Some of the values may be NaN if no bound could be found.
47 struct ppcg_ht_bounds
{
52 /* Free "bounds" along with all its fields.
54 __isl_null ppcg_ht_bounds
*ppcg_ht_bounds_free(
55 __isl_take ppcg_ht_bounds
*bounds
)
59 isl_val_free(bounds
->upper
);
60 isl_multi_val_free(bounds
->lower
);
66 /* Create a ppcg_ht_bounds object for a band living in "space".
67 * The bounds are initialized to NaN.
69 __isl_give ppcg_ht_bounds
*ppcg_ht_bounds_alloc(__isl_take isl_space
*space
)
73 ppcg_ht_bounds
*bounds
;
78 ctx
= isl_space_get_ctx(space
);
79 bounds
= isl_alloc_type(ctx
, struct ppcg_ht_bounds
);
82 bounds
->upper
= isl_val_nan(ctx
);
83 bounds
->lower
= isl_multi_val_zero(space
);
84 n
= isl_multi_val_dim(bounds
->lower
, isl_dim_set
);
85 for (i
= 0; i
< n
; ++i
) {
86 isl_val
*v
= isl_val_copy(bounds
->upper
);
87 bounds
->lower
= isl_multi_val_set_val(bounds
->lower
, i
, v
);
90 if (!bounds
->lower
|| !bounds
->upper
)
91 return ppcg_ht_bounds_free(bounds
);
95 isl_space_free(space
);
99 void ppcg_ht_bounds_dump(__isl_keep ppcg_ht_bounds
*bounds
)
104 fprintf(stderr
, "lower: ");
105 isl_multi_val_dump(bounds
->lower
);
106 fprintf(stderr
, "upper: ");
107 isl_val_dump(bounds
->upper
);
110 /* Return the upper bound on the relative dependence distances
111 * in the first space dimension.
113 __isl_give isl_val
*ppcg_ht_bounds_get_upper(__isl_keep ppcg_ht_bounds
*bounds
)
117 return isl_val_copy(bounds
->upper
);
120 /* Replace the upper bound on the relative dependence distances
121 * in the first space dimension by "upper".
123 __isl_give ppcg_ht_bounds
*ppcg_ht_bounds_set_upper(
124 __isl_take ppcg_ht_bounds
*bounds
, __isl_take isl_val
*upper
)
126 if (!bounds
|| !upper
)
128 isl_val_free(bounds
->upper
);
129 bounds
->upper
= upper
;
132 ppcg_ht_bounds_free(bounds
);
137 /* Return the lower bound on the relative dependence distances
138 * in space dimension "pos".
140 __isl_give isl_val
*ppcg_ht_bounds_get_lower(__isl_keep ppcg_ht_bounds
*bounds
,
145 return isl_multi_val_get_val(bounds
->lower
, pos
);
148 /* Replace the lower bound on the relative dependence distances
149 * in space dimension "pos" by "lower".
151 __isl_give ppcg_ht_bounds
*ppcg_ht_bounds_set_lower(
152 __isl_take ppcg_ht_bounds
*bounds
, int pos
, __isl_take isl_val
*lower
)
154 if (!bounds
|| !lower
)
156 bounds
->lower
= isl_multi_val_set_val(bounds
->lower
, pos
, lower
);
158 return ppcg_ht_bounds_free(bounds
);
161 ppcg_ht_bounds_free(bounds
);
166 /* Can the bounds on relative dependence distances recorded in "bounds"
167 * be used to perform hybrid tiling?
168 * In particular, have appropriate lower and upper bounds been found?
169 * Any NaN indicates that no corresponding bound was found.
171 isl_bool
ppcg_ht_bounds_is_valid(__isl_keep ppcg_ht_bounds
*bounds
)
177 return isl_bool_error
;
178 is_nan
= isl_val_is_nan(bounds
->upper
);
180 return isl_bool_error
;
182 return isl_bool_false
;
184 n
= isl_multi_val_dim(bounds
->lower
, isl_dim_set
);
185 for (i
= 0; i
< n
; ++i
) {
188 v
= isl_multi_val_get_val(bounds
->lower
, i
);
189 is_nan
= isl_val_is_nan(v
);
191 return isl_bool_error
;
193 return isl_bool_false
;
197 return isl_bool_true
;
200 /* Structure that represents the basic hexagonal tiling,
201 * along with information that is needed to perform the hybrid tiling.
203 * "bounds" are the bounds on the dependence distances that
204 * define the hexagonal shape and the required skewing in the remaining
207 * "input_node" points to the input pair of band nodes.
208 * "input_schedule" is the partial schedule of this input pair of band nodes.
209 * The space of this schedule is [P -> C], where P is the space
210 * of the parent node and C is the space of the child node.
212 * "space_sizes" represent the total size of a tile for the space
213 * dimensions, i.e., those corresponding to the child node.
214 * The space of "space_sizes" is C.
215 * If S_0 is the original tile size in the first space dimension,
216 * then the first entry of "space_sizes" is equal to
217 * W = 2*S_0 + floor(d_l h) + floor(d_u h).
218 * The remaining entries are the same as in the original tile sizes.
220 * The basic hexagonal tiling "hex" is defined
221 * in a "ts" (time-space) space and corresponds to the phase-1 tiles.
222 * "time_tile" maps the "ts" space to outer time tile.
223 * Is is equal to ts[t, s] -> floor(t/(2 * S_t)), with S_t the original tile
224 * size corresponding to the parent node.
225 * "local_time" maps the "ts" space to the time dimension inside each tile.
226 * It is equal to ts[t, s] -> t mod (2 S_t), with S_t the original tile
227 * size corresponding to the parent node.
228 * "shift_space" shifts the tiles at time tile T = floor(t/(2 S_t))
229 * in the space dimension such that they align to a multiple of W.
230 * It is equal to ts[t, s] -> s + (-(2 * shift_s)*T) % W,
231 * with shift_s = S_0 + floor(d_u h).
232 * "shift_phase" is the shift taken to go from phase 0 to phase 1
233 * It is equal to ts[t, s] -> ts[t + S_t, s + shift_s],
234 * with shift_s = S_0 + floor(d_u h).
236 * "project_ts" projects the space of the input schedule to the ts-space.
237 * It is equal to [P[t] -> C[s_0, ...]] -> ts[t, s_0].
239 struct ppcg_ht_tiling
{
242 ppcg_ht_bounds
*bounds
;
243 isl_schedule_node
*input_node
;
244 isl_multi_union_pw_aff
*input_schedule
;
246 isl_multi_val
*space_sizes
;
250 isl_aff
*shift_space
;
251 isl_multi_aff
*shift_phase
;
254 isl_multi_aff
*project_ts
;
256 typedef struct ppcg_ht_tiling ppcg_ht_tiling
;
258 /* Return the space of the pair of band nodes that form the input
259 * to the hybrid tiling.
260 * In particular, return the space [P -> C], where P is the space
261 * of the parent node and C is the space of the child node.
263 __isl_give isl_space
*ppcg_ht_tiling_get_input_space(
264 __isl_keep ppcg_ht_tiling
*tile
)
269 return isl_multi_union_pw_aff_get_space(tile
->input_schedule
);
272 /* Remove a reference to "tile" and free "tile" along with all its fields
273 * as soon as the reference count drops to zero.
275 static __isl_null ppcg_ht_tiling
*ppcg_ht_tiling_free(
276 __isl_take ppcg_ht_tiling
*tiling
)
280 if (--tiling
->ref
> 0)
283 ppcg_ht_bounds_free(tiling
->bounds
);
284 isl_schedule_node_free(tiling
->input_node
);
285 isl_multi_union_pw_aff_free(tiling
->input_schedule
);
286 isl_multi_val_free(tiling
->space_sizes
);
287 isl_aff_free(tiling
->time_tile
);
288 isl_aff_free(tiling
->local_time
);
289 isl_aff_free(tiling
->shift_space
);
290 isl_multi_aff_free(tiling
->shift_phase
);
291 isl_set_free(tiling
->hex
);
292 isl_multi_aff_free(tiling
->project_ts
);
298 /* Return a new reference to "tiling".
300 __isl_give ppcg_ht_tiling
*ppcg_ht_tiling_copy(
301 __isl_keep ppcg_ht_tiling
*tiling
)
310 /* Return the isl_ctx to which "tiling" belongs.
312 isl_ctx
*ppcg_ht_tiling_get_ctx(__isl_keep ppcg_ht_tiling
*tiling
)
317 return isl_multi_union_pw_aff_get_ctx(tiling
->input_schedule
);
320 /* Representation of one of the two phases of hybrid tiling.
322 * "tiling" points to the shared tiling data.
324 * "time_tile", "local_time" and "shift_space" are equal to the corresponding
325 * fields of "tiling", pulled back to the input space.
326 * In case of phase 0, these expressions have also been moved
327 * from phase 1 to phase 0.
329 * "domain" contains the hexagonal tiling of this phase.
331 * "space_shift" is the shift that should be added to the space band
332 * in order to be able to apply rectangular tiling to the space.
333 * For phase 1, it is equal to
335 * [P[t] -> C[s_0, s_i]] -> C[(-(2 * shift_s)*T) % W, dl_i * u]
337 * with shift_s = S_0 + floor(d_u h),
338 * T equal to "time_tile" and u equal to "local_time".
339 * For phase 0, it is equal to
341 * [P[t] -> C[s_0, s_i]] -> C[shift_s + (-(2 * shift_s)*T) % W, dl_i * u]
343 * "space_tile" is the space tiling. It is equal to
345 * [P[t] -> C[s]] -> C[floor((s + space_shift)/space_size]
347 struct ppcg_ht_phase
{
348 ppcg_ht_tiling
*tiling
;
352 isl_aff
*shift_space
;
355 isl_multi_aff
*space_shift
;
356 isl_multi_aff
*space_tile
;
359 /* Free "phase" along with all its fields.
361 static __isl_null ppcg_ht_phase
*ppcg_ht_phase_free(
362 __isl_take ppcg_ht_phase
*phase
)
367 ppcg_ht_tiling_free(phase
->tiling
);
368 isl_aff_free(phase
->time_tile
);
369 isl_aff_free(phase
->local_time
);
370 isl_aff_free(phase
->shift_space
);
371 isl_set_free(phase
->domain
);
372 isl_multi_aff_free(phase
->space_shift
);
373 isl_multi_aff_free(phase
->space_tile
);
379 /* Wrapper around ppcg_ht_phase_free for use as an argument
380 * to isl_id_set_free_user.
382 static void ppcg_ht_phase_free_wrap(void *user
)
384 ppcg_ht_phase
*phase
= user
;
386 ppcg_ht_phase_free(phase
);
389 /* Return the domain of hybrid tiling phase "phase".
391 static __isl_give isl_set
*ppcg_ht_phase_get_domain(ppcg_ht_phase
*phase
)
396 return isl_set_copy(phase
->domain
);
399 /* Return the space of the pair of band nodes that form the input
400 * to the hybrid tiling of which "phase" is a phase.
401 * In particular, return the space [P -> C], where P is the space
402 * of the parent node and C is the space of the child node.
404 static __isl_give isl_space
*ppcg_ht_phase_get_input_space(
405 __isl_keep ppcg_ht_phase
*phase
)
410 return ppcg_ht_tiling_get_input_space(phase
->tiling
);
413 /* Construct the lower left constraint of the hexagonal tile, i.e.,
415 * du a - b <= (2h+1) du - duh
416 * -du a + b + (2h+1) du - duh >= 0
418 * where duh = floor(du * h).
420 * This constraint corresponds to (6) in
421 * "Hybrid Hexagonal/Classical Tiling for GPUs".
423 static __isl_give isl_constraint
*hex_lower_left(__isl_take isl_local_space
*ls
,
424 __isl_keep isl_val
*h
, __isl_keep isl_val
*du
, __isl_keep isl_val
*duh
)
429 v
= isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h
), 2), 1);
430 v
= isl_val_mul(v
, isl_val_copy(du
));
431 v
= isl_val_sub(v
, isl_val_copy(duh
));
432 aff
= isl_aff_val_on_domain(ls
, v
);
433 v
= isl_val_neg(isl_val_copy(du
));
434 aff
= isl_aff_set_coefficient_val(aff
, isl_dim_in
, 0, v
);
435 aff
= isl_aff_set_coefficient_si(aff
, isl_dim_in
, 1, 1);
437 return isl_inequality_from_aff(aff
);
440 /* Construct the lower constraint of the hexagonal tile, i.e.,
445 * This constraint corresponds to (7) in
446 * "Hybrid Hexagonal/Classical Tiling for GPUs".
448 static __isl_give isl_constraint
*hex_lower(__isl_take isl_local_space
*ls
,
449 __isl_keep isl_val
*h
)
454 v
= isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h
), 2), 1);
455 aff
= isl_aff_val_on_domain(ls
, v
);
456 aff
= isl_aff_set_coefficient_si(aff
, isl_dim_in
, 0, -1);
458 return isl_inequality_from_aff(aff
);
461 /* Construct the lower right constraint of the hexagonal tile, i.e.,
463 * dl a + b <= (2h+1) dl + duh + (s0-1)
464 * -dl a - b + (2h+1) dl + duh + (s0-1) >= 0
466 * where duh = floor(du * h).
468 * This constraint corresponds to (8) in
469 * "Hybrid Hexagonal/Classical Tiling for GPUs".
471 static __isl_give isl_constraint
*hex_lower_right(
472 __isl_take isl_local_space
*ls
, __isl_keep isl_val
*h
,
473 __isl_keep isl_val
*s0
, __isl_keep isl_val
*dl
, __isl_keep isl_val
*duh
)
478 v
= isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h
), 2), 1);
479 v
= isl_val_mul(v
, isl_val_copy(dl
));
480 v
= isl_val_add(v
, isl_val_copy(duh
));
481 v
= isl_val_add(v
, isl_val_copy(s0
));
482 v
= isl_val_sub_ui(v
, 1);
483 aff
= isl_aff_val_on_domain(ls
, v
);
484 v
= isl_val_neg(isl_val_copy(dl
));
485 aff
= isl_aff_set_coefficient_val(aff
, isl_dim_in
, 0, v
);
486 aff
= isl_aff_set_coefficient_si(aff
, isl_dim_in
, 1, -1);
488 return isl_inequality_from_aff(aff
);
491 /* Construct the upper left constraint of the hexagonal tile, i.e.,
493 * dl a + b >= h dl - (d - 1)/d with d = den(dl)
494 * dl a + b - h dl + (d - 1)/d >= 0
496 * This constraint corresponds to (10) in
497 * "Hybrid Hexagonal/Classical Tiling for GPUs".
499 static __isl_give isl_constraint
*hex_upper_left(__isl_take isl_local_space
*ls
,
500 __isl_keep isl_val
*h
, __isl_keep isl_val
*dl
)
505 d
= isl_val_get_den_val(dl
);
506 v
= isl_val_sub_ui(isl_val_copy(d
), 1);
507 v
= isl_val_div(v
, d
);
508 v
= isl_val_sub(v
, isl_val_mul(isl_val_copy(h
), isl_val_copy(dl
)));
509 aff
= isl_aff_val_on_domain(ls
, v
);
510 aff
= isl_aff_set_coefficient_val(aff
, isl_dim_in
, 0, isl_val_copy(dl
));
511 aff
= isl_aff_set_coefficient_si(aff
, isl_dim_in
, 1, 1);
513 return isl_inequality_from_aff(aff
);
516 /* Construct the upper right constraint of the hexagonal tile, i.e.,
518 * du a - b >= du h - duh - (s0-1) - dlh - (d - 1)/d with d = den(du)
519 * du a - b - du h + duh + (s0-1) + dlh + (d - 1)/d >= 0
521 * where dlh = floor(dl * h) and duh = floor(du * h).
523 * This constraint corresponds to (12) in
524 * "Hybrid Hexagonal/Classical Tiling for GPUs".
526 static __isl_give isl_constraint
*hex_upper_right(
527 __isl_take isl_local_space
*ls
, __isl_keep isl_val
*h
,
528 __isl_keep isl_val
*s0
, __isl_keep isl_val
*du
,
529 __isl_keep isl_val
*dlh
, __isl_keep isl_val
*duh
)
534 d
= isl_val_get_den_val(du
);
535 v
= isl_val_sub_ui(isl_val_copy(d
), 1);
536 v
= isl_val_div(v
, d
);
537 v
= isl_val_sub(v
, isl_val_mul(isl_val_copy(h
), isl_val_copy(du
)));
538 v
= isl_val_add(v
, isl_val_copy(duh
));
539 v
= isl_val_add(v
, isl_val_copy(dlh
));
540 v
= isl_val_add(v
, isl_val_copy(s0
));
541 v
= isl_val_sub_ui(v
, 1);
542 aff
= isl_aff_val_on_domain(ls
, v
);
543 aff
= isl_aff_set_coefficient_val(aff
, isl_dim_in
, 0, isl_val_copy(du
));
544 aff
= isl_aff_set_coefficient_si(aff
, isl_dim_in
, 1, -1);
546 return isl_inequality_from_aff(aff
);
549 /* Construct the uppper constraint of the hexagonal tile, i.e.,
553 * This constraint corresponds to (13) in
554 * "Hybrid Hexagonal/Classical Tiling for GPUs".
556 static __isl_give isl_constraint
*hex_upper(__isl_take isl_local_space
*ls
)
560 aff
= isl_aff_var_on_domain(ls
, isl_dim_set
, 0);
562 return isl_inequality_from_aff(aff
);
565 /* Construct the basic hexagonal tile shape.
566 * "space" is the 2D space in which the hexagon should be constructed.
567 * h is st-1, with st the tile size in the time dimension
568 * s0 is the tile size in the space dimension
569 * dl is a bound on the negative relative dependence distances, i.e.,
573 * du is a bound on the positive relative dependence distances, i.e.,
577 * with (d_t,d_s) any dependence distance vector.
578 * dlh = floor(dl * h)
579 * duh = floor(du * h)
581 * The shape of the hexagon is as follows:
593 * duh+s0-1+dlh+1+s0+1
595 * The next hexagon is shifted by duh + dlh + 2 * s0.
597 * The slope of the "/" constraints is dl.
598 * The slope of the "\_" constraints is du.
600 static __isl_give isl_set
*compute_hexagon(__isl_take isl_space
*space
,
601 __isl_keep isl_val
*h
, __isl_keep isl_val
*s0
,
602 __isl_keep isl_val
*dl
, __isl_keep isl_val
*du
,
603 __isl_keep isl_val
*dlh
, __isl_keep isl_val
*duh
)
609 ls
= isl_local_space_from_space(space
);
611 c
= hex_lower_left(isl_local_space_copy(ls
), h
, du
, duh
);
612 bset
= isl_basic_set_from_constraint(c
);
614 c
= hex_lower(isl_local_space_copy(ls
), h
);
615 bset
= isl_basic_set_add_constraint(bset
, c
);
617 c
= hex_lower_right(isl_local_space_copy(ls
), h
, s0
, dl
, duh
);
618 bset
= isl_basic_set_add_constraint(bset
, c
);
620 c
= hex_upper_left(isl_local_space_copy(ls
), h
, dl
);
621 bset
= isl_basic_set_add_constraint(bset
, c
);
623 c
= hex_upper_right(isl_local_space_copy(ls
), h
, s0
, du
, dlh
, duh
);
624 bset
= isl_basic_set_add_constraint(bset
, c
);
627 bset
= isl_basic_set_add_constraint(bset
, c
);
629 return isl_set_from_basic_set(bset
);
632 /* Name of the ts-space.
634 static const char *ts_space_name
= "ts";
636 /* Construct and return the space ts[t, s].
638 static __isl_give isl_space
*construct_ts_space(isl_ctx
*ctx
)
642 s
= isl_space_set_alloc(ctx
, 0, 2);
643 s
= isl_space_set_tuple_name(s
, isl_dim_set
, ts_space_name
);
648 /* Name of the local ts-space.
650 static const char *local_ts_space_name
= "local_ts";
652 /* Construct and return the space local_ts[t, s].
654 static __isl_give isl_space
*construct_local_ts_space(isl_ctx
*ctx
)
658 s
= isl_space_set_alloc(ctx
, 0, 2);
659 s
= isl_space_set_tuple_name(s
, isl_dim_set
, local_ts_space_name
);
664 /* Compute the total size of a tile for the space dimensions,
665 * i.e., those corresponding to the child node
666 * of the input pattern.
667 * If S_0 is the original tile size in the first space dimension,
668 * then the first entry of "space_sizes" is equal to
669 * W = 2*S_0 + floor(d_l h) + floor(d_u h).
670 * The remaining entries are the same as in the original tile sizes.
671 * "tile_sizes" contains the original tile sizes, including
672 * the tile size corresponding to the parent node.
673 * "dlh" is equal to floor(d_l h).
674 * "duh" is equal to floor(d_u h).
676 static __isl_give isl_multi_val
*compute_space_sizes(
677 __isl_keep isl_multi_val
*tile_sizes
,
678 __isl_keep isl_val
*dlh
, __isl_keep isl_val
*duh
)
681 isl_multi_val
*space_sizes
;
683 space_sizes
= isl_multi_val_copy(tile_sizes
);
684 space_sizes
= isl_multi_val_factor_range(space_sizes
);
685 size
= isl_multi_val_get_val(space_sizes
, 0);
686 size
= isl_val_mul_ui(size
, 2);
687 size
= isl_val_add(size
, isl_val_copy(duh
));
688 size
= isl_val_add(size
, isl_val_copy(dlh
));
689 space_sizes
= isl_multi_val_set_val(space_sizes
, 0, size
);
694 /* Compute the offset of phase 1 with respect to phase 0
695 * in the ts-space ("space").
696 * In particular, return
700 static __isl_give isl_multi_val
*compute_phase_shift(
701 __isl_keep isl_space
*space
, __isl_keep isl_val
*st
,
702 __isl_keep isl_val
*s0
, __isl_keep isl_val
*duh
)
705 isl_multi_val
*phase_shift
;
707 phase_shift
= isl_multi_val_zero(isl_space_copy(space
));
708 phase_shift
= isl_multi_val_set_val(phase_shift
, 0, isl_val_copy(st
));
709 v
= isl_val_add(isl_val_copy(duh
), isl_val_copy(s0
));
710 phase_shift
= isl_multi_val_set_val(phase_shift
, 1, v
);
715 /* Return the function
717 * ts[t, s] -> floor(t/(2 * st))
719 * representing the time tile.
720 * "space" is the space ts[t, s].
722 static __isl_give isl_aff
*compute_time_tile(__isl_keep isl_space
*space
,
723 __isl_keep isl_val
*st
)
729 ls
= isl_local_space_from_space(isl_space_copy(space
));
730 t
= isl_aff_var_on_domain(ls
, isl_dim_set
, 0);
731 v
= isl_val_mul_ui(isl_val_copy(st
), 2);
732 t
= isl_aff_floor(isl_aff_scale_down_val(t
, v
));
737 /* Compute a shift in the space dimension for tiles
738 * at time tile T = floor(t/(2 * S_t))
739 * such that they align to a multiple of the total space tile dimension W.
740 * In particular, compute
742 * ts[t, s] -> s + (-(2 * shift_s)*T) % W
744 * where shift_s is the shift of phase 1 with respect to phase 0
745 * in the space dimension (the first element of "phase_shift").
746 * W is stored in the first element of "space_sizes".
747 * "time_tile" is the function
749 * ts[t, s] -> floor(t/(2 * S_T))
751 * Since phase 1 is shifted by shift_s with respect to phase 0,
752 * the next line of phase 0 (at T+1) is shifted by 2*shift_s
753 * with respect to the previous line (at T).
754 * A shift of -(2 * shift_s)*T therefore allows the basic pattern
755 * (which starts at 0) to be applied.
756 * However, this shift will be used to obtain the tile coordinate
757 * in the first space dimension and if the original values
758 * in the space dimension are non-negative, then the shift should
759 * not make them negative. Moreover, the shift should be as minimal
761 * Since the pattern repeats itself with a period of W in the space
762 * dimension, the shift can be replaced by (-(2 * shift_s)*T) % W.
764 static __isl_give isl_aff
*compute_shift_space(__isl_keep isl_aff
*time_tile
,
765 __isl_keep isl_multi_val
*space_sizes
,
766 __isl_keep isl_multi_val
*phase_shift
)
772 ls
= isl_local_space_from_space(isl_aff_get_domain_space(time_tile
));
773 t
= isl_aff_copy(time_tile
);
774 v
= isl_val_mul_ui(isl_multi_val_get_val(phase_shift
, 1), 2);
776 t
= isl_aff_scale_val(t
, v
);
777 v
= isl_multi_val_get_val(space_sizes
, 0);
778 t
= isl_aff_mod_val(t
, v
);
779 s
= isl_aff_var_on_domain(ls
, isl_dim_set
, 1);
780 s
= isl_aff_add(s
, t
);
785 /* Give the phase_shift ts[S_t, S_0 + floor(d_u h)],
786 * compute a function that applies the shift, i.e.,
788 * ts[t, s] -> ts[t + S_t, s + S_0 + floor(d_u h)],
790 static __isl_give isl_multi_aff
*compute_shift_phase(
791 __isl_keep isl_multi_val
*phase_shift
)
794 isl_multi_aff
*shift
;
796 space
= isl_multi_val_get_space(phase_shift
);
797 shift
= isl_multi_aff_multi_val_on_space(space
,
798 isl_multi_val_copy(phase_shift
));
799 space
= isl_multi_aff_get_space(shift
);
800 shift
= isl_multi_aff_add(shift
, isl_multi_aff_identity(space
));
805 /* Compute a mapping from the ts-space to the local coordinates
806 * within each tile. In particular, compute
808 * ts[t, s] -> local_ts[t % (2 S_t), (s + (-(2 * shift_s)*T) % W) % W]
810 * "ts" is the space ts[t, s]
811 * "local_ts" is the space local_ts[t, s]
812 * "shift_space" is equal to ts[t, s] -> s + (-(2 * shift_s)*T) % W
813 * "st" is the tile size in the time dimension S_t.
814 * The first element of "space_sizes" is equal to W.
816 static __isl_give isl_multi_aff
*compute_localize(
817 __isl_keep isl_space
*local_ts
, __isl_keep isl_aff
*shift_space
,
818 __isl_keep isl_val
*st
, __isl_keep isl_multi_val
*space_sizes
)
823 isl_multi_aff
*localize
;
825 space
= isl_aff_get_domain_space(shift_space
);
826 local_ts
= isl_space_copy(local_ts
);
827 space
= isl_space_map_from_domain_and_range(space
, local_ts
);
828 localize
= isl_multi_aff_identity(space
);
829 t
= isl_multi_aff_get_aff(localize
, 0);
830 v
= isl_val_mul_ui(isl_val_copy(st
), 2);
831 t
= isl_aff_mod_val(t
, v
);
832 localize
= isl_multi_aff_set_aff(localize
, 0, t
);
833 s
= isl_aff_copy(shift_space
);
834 v
= isl_multi_val_get_val(space_sizes
, 0);
835 s
= isl_aff_mod_val(s
, v
);
836 localize
= isl_multi_aff_set_aff(localize
, 1, s
);
841 /* Set the project_ts field of "tiling".
843 * This field projects the space of the input schedule to the ts-space.
844 * It is equal to [P[t] -> C[s_0, ...]] -> ts[t, s_0].
846 static __isl_give ppcg_ht_tiling
*ppcg_ht_tiling_set_project_ts(
847 __isl_take ppcg_ht_tiling
*tiling
)
851 isl_multi_aff
*project
;
856 space
= ppcg_ht_tiling_get_input_space(tiling
);
857 n
= isl_space_dim(space
, isl_dim_set
);
858 project
= isl_multi_aff_project_out_map(space
, isl_dim_set
, 2, n
- 2);
859 project
= isl_multi_aff_set_tuple_name(project
,
860 isl_dim_out
, ts_space_name
);
862 return ppcg_ht_tiling_free(tiling
);
864 tiling
->project_ts
= project
;
869 /* Construct a hybrid tiling description from bounds on the dependence
870 * distances "bounds".
871 * "input_node" points to the original parent node.
872 * "input_schedule" is the combined schedule of the parent and child
874 * "tile_sizes" are the original, user specified tile sizes.
876 static __isl_give ppcg_ht_tiling
*ppcg_ht_bounds_construct_tiling(
877 __isl_take ppcg_ht_bounds
*bounds
,
878 __isl_keep isl_schedule_node
*input_node
,
879 __isl_keep isl_multi_union_pw_aff
*input_schedule
,
880 __isl_keep isl_multi_val
*tile_sizes
)
883 ppcg_ht_tiling
*tiling
;
884 isl_multi_val
*space_sizes
, *phase_shift
;
885 isl_aff
*time_tile
, *shift_space
;
886 isl_multi_aff
*localize
;
887 isl_val
*h
, *duh
, *dlh
;
888 isl_val
*st
, *s0
, *du
, *dl
;
889 isl_space
*ts
, *local_ts
;
891 if (!bounds
|| !input_node
|| !input_schedule
|| !tile_sizes
)
894 ctx
= isl_multi_union_pw_aff_get_ctx(input_schedule
);
895 tiling
= isl_calloc_type(ctx
, struct ppcg_ht_tiling
);
900 st
= isl_multi_val_get_val(tile_sizes
, 0);
901 h
= isl_val_sub_ui(isl_val_copy(st
), 1);
902 s0
= isl_multi_val_get_val(tile_sizes
, 1);
903 du
= ppcg_ht_bounds_get_upper(bounds
);
904 dl
= ppcg_ht_bounds_get_lower(bounds
, 0);
906 duh
= isl_val_floor(isl_val_mul(isl_val_copy(du
), isl_val_copy(h
)));
907 dlh
= isl_val_floor(isl_val_mul(isl_val_copy(dl
), isl_val_copy(h
)));
909 ts
= construct_ts_space(ctx
);
910 local_ts
= construct_local_ts_space(ctx
);
912 space_sizes
= compute_space_sizes(tile_sizes
, dlh
, duh
);
913 phase_shift
= compute_phase_shift(ts
, st
, s0
, duh
);
914 time_tile
= compute_time_tile(ts
, st
);
915 shift_space
= compute_shift_space(time_tile
, space_sizes
, phase_shift
);
916 localize
= compute_localize(local_ts
, shift_space
, st
, space_sizes
);
919 tiling
->input_node
= isl_schedule_node_copy(input_node
);
920 tiling
->input_schedule
= isl_multi_union_pw_aff_copy(input_schedule
);
921 tiling
->space_sizes
= space_sizes
;
922 tiling
->bounds
= bounds
;
923 tiling
->local_time
= isl_multi_aff_get_aff(localize
, 0);
924 tiling
->hex
= compute_hexagon(local_ts
, h
, s0
, dl
, du
, dlh
, duh
);
925 tiling
->hex
= isl_set_preimage_multi_aff(tiling
->hex
, localize
);
926 tiling
->time_tile
= time_tile
;
927 tiling
->shift_space
= shift_space
;
928 tiling
->shift_phase
= compute_shift_phase(phase_shift
);
929 isl_multi_val_free(phase_shift
);
939 if (!tiling
->input_schedule
|| !tiling
->local_time
|| !tiling
->hex
||
940 !tiling
->shift_space
|| !tiling
->shift_phase
)
941 return ppcg_ht_tiling_free(tiling
);
943 tiling
= ppcg_ht_tiling_set_project_ts(tiling
);
947 ppcg_ht_bounds_free(bounds
);
951 /* Are all members of the band node "node" coincident?
953 static isl_bool
all_coincident(__isl_keep isl_schedule_node
*node
)
957 n
= isl_schedule_node_band_n_member(node
);
958 for (i
= 0; i
< n
; ++i
) {
961 c
= isl_schedule_node_band_member_get_coincident(node
, i
);
966 return isl_bool_true
;
969 /* Does "node" satisfy the properties of the inner node in the input
970 * pattern for hybrid tiling?
971 * That is, is it a band node with only coincident members, of which
972 * there is at least one?
974 static isl_bool
has_child_properties(__isl_keep isl_schedule_node
*node
)
977 return isl_bool_error
;
978 if (isl_schedule_node_get_type(node
) != isl_schedule_node_band
)
979 return isl_bool_false
;
980 if (isl_schedule_node_band_n_member(node
) < 1)
981 return isl_bool_false
;
982 return all_coincident(node
);
985 /* Does "node" satisfy the properties of the outer node in the input
986 * pattern for hybrid tiling?
987 * That is, is it a band node with a single member?
989 static isl_bool
has_parent_properties(__isl_keep isl_schedule_node
*node
)
992 return isl_bool_error
;
993 if (isl_schedule_node_get_type(node
) != isl_schedule_node_band
)
994 return isl_bool_false
;
995 if (isl_schedule_node_band_n_member(node
) != 1)
996 return isl_bool_false
;
997 return isl_bool_true
;
1000 /* Does the parent of "node" satisfy the input patttern for hybrid tiling?
1001 * That is, does "node" satisfy the properties of the inner node and
1002 * does the parent of "node" satisfy the properties of the outer node?
1004 isl_bool
ppcg_ht_parent_has_input_pattern(__isl_keep isl_schedule_node
*node
)
1006 isl_bool has_pattern
;
1008 has_pattern
= has_child_properties(node
);
1009 if (has_pattern
< 0 || !has_pattern
)
1012 node
= isl_schedule_node_copy(node
);
1013 node
= isl_schedule_node_parent(node
);
1014 has_pattern
= has_parent_properties(node
);
1015 isl_schedule_node_free(node
);
1020 /* Does "node" satisfy the input patttern for hybrid tiling?
1021 * That is, does "node" satisfy the properties of the outer node and
1022 * does the child of "node" satisfy the properties of the inner node?
1024 isl_bool
ppcg_ht_has_input_pattern(__isl_keep isl_schedule_node
*node
)
1026 isl_bool has_pattern
;
1028 has_pattern
= has_parent_properties(node
);
1029 if (has_pattern
< 0 || !has_pattern
)
1032 node
= isl_schedule_node_get_child(node
, 0);
1033 has_pattern
= has_child_properties(node
);
1034 isl_schedule_node_free(node
);
1039 /* Check that "node" satisfies the input pattern for hybrid tiling.
1040 * Error out if it does not.
1042 static isl_stat
check_input_pattern(__isl_keep isl_schedule_node
*node
)
1044 isl_bool has_pattern
;
1046 has_pattern
= ppcg_ht_has_input_pattern(node
);
1047 if (has_pattern
< 0)
1048 return isl_stat_error
;
1050 isl_die(isl_schedule_node_get_ctx(node
), isl_error_invalid
,
1051 "invalid input pattern for hybrid tiling",
1052 return isl_stat_error
);
1057 /* Extract the input schedule from "node", i.e., the product
1058 * of the partial schedules of the parent and child nodes
1059 * in the input pattern.
1061 static __isl_give isl_multi_union_pw_aff
*extract_input_schedule(
1062 __isl_keep isl_schedule_node
*node
)
1064 isl_multi_union_pw_aff
*partial
, *partial2
;
1066 partial
= isl_schedule_node_band_get_partial_schedule(node
);
1067 node
= isl_schedule_node_get_child(node
, 0);
1068 partial2
= isl_schedule_node_band_get_partial_schedule(node
);
1069 isl_schedule_node_free(node
);
1071 return isl_multi_union_pw_aff_range_product(partial
, partial2
);
1074 /* Collect all dependences from "scop" that are relevant for performing
1075 * hybrid tiling on "node" and its child and map them to the schedule
1076 * space of this pair of nodes.
1078 * In case live range reordering is not used,
1079 * the flow and the false dependences are collected.
1080 * In case live range reordering is used,
1081 * the flow and the forced dependences are collected, as well
1082 * as the order dependences that are adjacent to non-local
1085 * In all cases, only dependences that map to the same instance
1086 * of the outer part of the schedule are considered.
1088 static __isl_give isl_map
*collect_deps(struct ppcg_scop
*scop
,
1089 __isl_keep isl_schedule_node
*node
)
1092 isl_multi_union_pw_aff
*prefix
, *partial
;
1093 isl_union_map
*flow
, *other
, *dep
, *umap
;
1096 prefix
= isl_schedule_node_get_prefix_schedule_multi_union_pw_aff(node
);
1097 partial
= extract_input_schedule(node
);
1098 space
= isl_multi_union_pw_aff_get_space(partial
);
1100 flow
= isl_union_map_copy(scop
->dep_flow
);
1101 flow
= isl_union_map_eq_at_multi_union_pw_aff(flow
,
1102 isl_multi_union_pw_aff_copy(prefix
));
1103 if (!scop
->options
->live_range_reordering
) {
1104 other
= isl_union_map_copy(scop
->dep_false
);
1105 other
= isl_union_map_eq_at_multi_union_pw_aff(other
, prefix
);
1107 isl_union_map
*local
, *non_local
, *order
, *adj
;
1108 isl_union_set
*domain
, *range
;
1110 other
= isl_union_map_copy(scop
->dep_forced
);
1111 other
= isl_union_map_eq_at_multi_union_pw_aff(other
,
1112 isl_multi_union_pw_aff_copy(prefix
));
1113 local
= isl_union_map_copy(flow
);
1114 local
= isl_union_map_eq_at_multi_union_pw_aff(local
,
1115 isl_multi_union_pw_aff_copy(partial
));
1116 non_local
= isl_union_map_copy(flow
);
1117 non_local
= isl_union_map_subtract(non_local
, local
);
1119 order
= isl_union_map_copy(scop
->dep_order
);
1120 order
= isl_union_map_eq_at_multi_union_pw_aff(order
, prefix
);
1121 adj
= isl_union_map_copy(order
);
1122 domain
= isl_union_map_domain(isl_union_map_copy(non_local
));
1123 domain
= isl_union_set_coalesce(domain
);
1124 adj
= isl_union_map_intersect_range(adj
, domain
);
1125 other
= isl_union_map_union(other
, adj
);
1128 range
= isl_union_map_range(non_local
);
1129 range
= isl_union_set_coalesce(range
);
1130 adj
= isl_union_map_intersect_domain(adj
, range
);
1131 other
= isl_union_map_union(other
, adj
);
1133 dep
= isl_union_map_union(flow
, other
);
1135 umap
= isl_union_map_from_multi_union_pw_aff(partial
);
1136 dep
= isl_union_map_apply_domain(dep
, isl_union_map_copy(umap
));
1137 dep
= isl_union_map_apply_range(dep
, umap
);
1139 space
= isl_space_map_from_set(space
);
1140 map
= isl_union_map_extract_map(dep
, space
);
1141 isl_union_map_free(dep
);
1143 map
= isl_map_coalesce(map
);
1148 /* Given a constraint of the form
1150 * a i_0 + b i_1 >= 0
1154 * use it to update one or both of the non-negative bounds
1155 * in "list" = (min, max) such that
1161 * If b = 0, then the constraint cannot be used.
1162 * Otherwise, the constraint is equivalent to
1164 * sgn(b) i_1 >= - a/abs(b) i_0
1166 * i_1 >= - a/abs(b) i_0
1168 * i_1 <= a/abs(b) i_0
1170 * Set the first or second element of "list" to max(0, a/abs(b)),
1171 * according to the sign of "b". Or set both in case the constraint
1172 * is an equality, taking into account the sign change.
1174 static __isl_give isl_val_list
*list_set_min_max(__isl_take isl_val_list
*list
,
1175 __isl_keep isl_constraint
*c
)
1180 isl_bool eq
, is_zero
, is_neg
;
1182 eq
= isl_constraint_is_equality(c
);
1184 return isl_val_list_free(list
);
1186 b
= isl_constraint_get_coefficient_val(c
, isl_dim_set
, 1);
1187 is_zero
= isl_val_is_zero(b
);
1188 if (is_zero
== isl_bool_true
) {
1192 a
= isl_constraint_get_coefficient_val(c
, isl_dim_set
, 0);
1193 sign
= isl_val_sgn(b
);
1195 a
= isl_val_div(a
, b
);
1198 b
= isl_val_copy(a
);
1200 pos
= sign
> 0 ? 0 : 1;
1201 is_neg
= isl_val_is_neg(a
);
1202 if (is_neg
== isl_bool_true
)
1203 a
= isl_val_set_si(a
, 0);
1204 list
= isl_val_list_set_val(list
, pos
, a
);
1207 return is_neg
< 0 ? isl_val_list_free(list
) : list
;
1211 is_neg
= isl_val_is_neg(a
);
1212 if (is_neg
== isl_bool_true
)
1213 a
= isl_val_set_si(a
, 0);
1214 list
= isl_val_list_set_val(list
, pos
, a
);
1216 return is_neg
< 0 ? isl_val_list_free(list
) : list
;
1219 /* If constraint "c" passes through the origin, then try and use it
1220 * to update the non-negative bounds in "list" = (min, max) such that
1226 static isl_stat
set_min_max(__isl_take isl_constraint
*c
, void *user
)
1229 isl_val_list
**list
= user
;
1232 v
= isl_constraint_get_constant_val(c
);
1233 is_zero
= isl_val_is_zero(v
);
1236 if (is_zero
== isl_bool_true
)
1237 *list
= list_set_min_max(*list
, c
);
1239 isl_constraint_free(c
);
1240 return is_zero
< 0 ? isl_stat_error
: isl_stat_ok
;
1243 /* Given a set of dependence distance vectors "dist", compute
1244 * pair of non-negative bounds min and max such that
1250 * and return the pair (min, max).
1251 * If no bound can be found in either direction, then the bound
1252 * is replaced by NaN.
1254 * The dependence distances are first projected onto the (d_0, d_pos).
1255 * Then the zero dependence distance is added and the convex hull is computed.
1256 * Finally, the bounds are extracted from the constraints of the convex hull
1257 * that pass through the origin.
1259 static __isl_give isl_val_list
*min_max_dist(__isl_keep isl_set
*dist
, int pos
)
1262 isl_basic_set
*hull
;
1268 ctx
= isl_set_get_ctx(dist
);
1269 nan
= isl_val_nan(ctx
);
1270 list
= isl_val_list_alloc(ctx
, 2);
1271 list
= isl_val_list_add(list
, isl_val_copy(nan
));
1272 list
= isl_val_list_add(list
, nan
);
1274 dist
= isl_set_copy(dist
);
1275 dim
= isl_set_dim(dist
, isl_dim_set
);
1276 if (dist
&& pos
>= dim
)
1277 isl_die(ctx
, isl_error_internal
, "position out of bounds",
1278 dist
= isl_set_free(dist
));
1279 dist
= isl_set_project_out(dist
, isl_dim_set
, pos
+ 1, dim
- (pos
+ 1));
1280 dist
= isl_set_project_out(dist
, isl_dim_set
, 1, pos
- 1);
1282 space
= isl_set_get_space(dist
);
1283 dist
= isl_set_union(dist
, isl_set_from_point(isl_point_zero(space
)));
1284 dist
= isl_set_remove_divs(dist
);
1285 hull
= isl_set_convex_hull(dist
);
1287 if (isl_basic_set_foreach_constraint(hull
, &set_min_max
, &list
) < 0)
1288 list
= isl_val_list_free(list
);
1289 isl_basic_set_free(hull
);
1294 /* Given a schedule node "node" that, together with its child,
1295 * satisfies the input pattern for hybrid tiling, compute bounds
1296 * on the relative dependence distances of the child node with
1297 * respect to the parent node. These bounds are needed to
1298 * construct a hybrid tiling.
1300 * First all relevant dependences are collected and mapped
1301 * to the schedule space of the pair of nodes. Then, the
1302 * dependence distances are computed in this space.
1304 * These dependence distances are then projected onto a two-dimensional
1305 * space consisting of the single schedule dimension of the outer node
1306 * and one of the schedule dimensions of the inner node.
1307 * The maximal and minimal relative dependence distances are extracted
1308 * from these projections.
1309 * This process is repeated for each of the schedule dimensions
1310 * of the inner node. For the first dimension, both minimal and
1311 * maximal relative dependence distances are stored in the result.
1312 * For the other dimensions, only the minimal relative dependence
1313 * distance is stored.
1315 __isl_give ppcg_ht_bounds
*ppcg_ht_compute_bounds(struct ppcg_scop
*scop
,
1316 __isl_keep isl_schedule_node
*node
)
1318 ppcg_ht_bounds
*bnd
;
1323 isl_schedule_node
*child
;
1327 if (!scop
|| !node
|| check_input_pattern(node
) < 0)
1330 child
= isl_schedule_node_get_child(node
, 0);
1331 space
= isl_schedule_node_band_get_space(child
);
1332 dim
= isl_schedule_node_band_n_member(child
);
1333 isl_schedule_node_free(child
);
1334 bnd
= ppcg_ht_bounds_alloc(space
);
1338 map
= collect_deps(scop
, node
);
1340 dist
= isl_map_deltas(map
);
1341 n
= isl_set_dim(dist
, isl_dim_param
);
1342 dist
= isl_set_project_out(dist
, isl_dim_param
, 0, n
);
1344 pair
= min_max_dist(dist
, 1);
1345 bnd
= ppcg_ht_bounds_set_lower(bnd
, 0, isl_val_list_get_val(pair
, 0));
1346 bnd
= ppcg_ht_bounds_set_upper(bnd
, isl_val_list_get_val(pair
, 1));
1347 isl_val_list_free(pair
);
1349 for (i
= 1; i
< dim
; ++i
) {
1350 pair
= min_max_dist(dist
, 1 + i
);
1351 bnd
= ppcg_ht_bounds_set_lower(bnd
, i
,
1352 isl_val_list_get_val(pair
, 0));
1353 isl_val_list_free(pair
);
1361 /* Check if all the fields of "phase" are valid, freeing "phase"
1364 static __isl_give ppcg_ht_phase
*check_phase(__isl_take ppcg_ht_phase
*phase
)
1369 if (!phase
->tiling
|| !phase
->local_time
||
1370 !phase
->shift_space
|| !phase
->domain
)
1371 return ppcg_ht_phase_free(phase
);
1376 /* Construct a ppcg_ht_phase object, that simply copies
1377 * information from "tiling".
1378 * That is, the result is defined over the "ts" space and
1379 * corresponds to phase 1.
1381 static __isl_give ppcg_ht_phase
*construct_phase(
1382 __isl_keep ppcg_ht_tiling
*tiling
)
1385 ppcg_ht_phase
*phase
;
1390 ctx
= ppcg_ht_tiling_get_ctx(tiling
);
1391 phase
= isl_calloc_type(ctx
, struct ppcg_ht_phase
);
1394 phase
->tiling
= ppcg_ht_tiling_copy(tiling
);
1395 phase
->time_tile
= isl_aff_copy(tiling
->time_tile
);
1396 phase
->local_time
= isl_aff_copy(tiling
->local_time
);
1397 phase
->shift_space
= isl_aff_copy(tiling
->shift_space
);
1398 phase
->domain
= isl_set_copy(tiling
->hex
);
1400 return check_phase(phase
);
1403 /* Align the parameters of the elements of "phase" to those of "space".
1405 static __isl_give ppcg_ht_phase
*phase_align_params(
1406 __isl_take ppcg_ht_phase
*phase
, __isl_take isl_space
*space
)
1411 phase
->time_tile
= isl_aff_align_params(phase
->time_tile
,
1412 isl_space_copy(space
));
1413 phase
->local_time
= isl_aff_align_params(phase
->local_time
,
1414 isl_space_copy(space
));
1415 phase
->shift_space
= isl_aff_align_params(phase
->shift_space
,
1416 isl_space_copy(space
));
1417 phase
->domain
= isl_set_align_params(phase
->domain
, space
);
1419 return check_phase(phase
);
1421 isl_space_free(space
);
1425 /* Pull back "phase" over "ma".
1426 * That is, take a phase defined over the range of "ma" and
1427 * turn it into a phase defined over the domain of "ma".
1429 static __isl_give ppcg_ht_phase
*pullback_phase(__isl_take ppcg_ht_phase
*phase
,
1430 __isl_take isl_multi_aff
*ma
)
1432 phase
= phase_align_params(phase
, isl_multi_aff_get_space(ma
));
1436 phase
->time_tile
= isl_aff_pullback_multi_aff(phase
->time_tile
,
1437 isl_multi_aff_copy(ma
));
1438 phase
->local_time
= isl_aff_pullback_multi_aff(phase
->local_time
,
1439 isl_multi_aff_copy(ma
));
1440 phase
->shift_space
= isl_aff_pullback_multi_aff(phase
->shift_space
,
1441 isl_multi_aff_copy(ma
));
1442 phase
->domain
= isl_set_preimage_multi_aff(phase
->domain
, ma
);
1444 return check_phase(phase
);
1446 isl_multi_aff_free(ma
);
1450 /* Pullback "phase" over phase->tiling->shift_phase, which shifts
1451 * phase 0 to phase 1. The pullback therefore takes a phase 1
1452 * description and turns it into a phase 0 description.
1454 static __isl_give ppcg_ht_phase
*shift_phase(__isl_take ppcg_ht_phase
*phase
)
1456 ppcg_ht_tiling
*tiling
;
1461 tiling
= phase
->tiling
;
1462 return pullback_phase(phase
, isl_multi_aff_copy(tiling
->shift_phase
));
1465 /* Take a "phase" defined over the ts-space and plug in the projection
1466 * from the input schedule space to the ts-space.
1467 * The result is then defined over this input schedule space.
1469 static __isl_give ppcg_ht_phase
*lift_phase(__isl_take ppcg_ht_phase
*phase
)
1471 ppcg_ht_tiling
*tiling
;
1476 tiling
= phase
->tiling
;
1477 return pullback_phase(phase
, isl_multi_aff_copy(tiling
->project_ts
));
1480 /* Compute the shift that should be added to the space band
1481 * in order to be able to apply rectangular tiling to the space.
1482 * Store the shift in phase->space_shift.
1484 * In the first dimension, it is equal to shift_space - s.
1485 * For phase 1, this results in
1487 * (-(2 * shift_s)*T) % W
1489 * In phase 0, the "s" in shift_space has been replaced by "s + shift_s",
1492 * shift_s + (-(2 * shift_s)*T) % W
1494 * In the other dimensions, the shift is equal to
1496 * dl_i * local_time.
1498 static __isl_give ppcg_ht_phase
*compute_space_shift(
1499 __isl_take ppcg_ht_phase
*phase
)
1503 isl_local_space
*ls
;
1505 isl_multi_aff
*space_shift
;
1510 space
= ppcg_ht_phase_get_input_space(phase
);
1511 space
= isl_space_unwrap(space
);
1512 space
= isl_space_range_map(space
);
1514 space_shift
= isl_multi_aff_zero(space
);
1515 aff
= isl_aff_copy(phase
->shift_space
);
1516 ls
= isl_local_space_from_space(isl_aff_get_domain_space(aff
));
1517 s
= isl_aff_var_on_domain(ls
, isl_dim_set
, 1);
1518 aff
= isl_aff_sub(aff
, s
);
1519 space_shift
= isl_multi_aff_set_aff(space_shift
, 0, aff
);
1521 n
= isl_multi_aff_dim(space_shift
, isl_dim_out
);
1522 for (i
= 1; i
< n
; ++i
) {
1526 v
= ppcg_ht_bounds_get_lower(phase
->tiling
->bounds
, i
);
1527 time
= isl_aff_copy(phase
->local_time
);
1528 time
= isl_aff_scale_val(time
, v
);
1529 space_shift
= isl_multi_aff_set_aff(space_shift
, i
, time
);
1533 return ppcg_ht_phase_free(phase
);
1534 phase
->space_shift
= space_shift
;
1538 /* Compute the space tiling and store the result in phase->space_tile.
1539 * The space tiling is of the form
1541 * [P[t] -> C[s]] -> C[floor((s + space_shift)/space_size]
1543 static __isl_give ppcg_ht_phase
*compute_space_tile(
1544 __isl_take ppcg_ht_phase
*phase
)
1547 isl_multi_val
*space_sizes
;
1548 isl_multi_aff
*space_shift
;
1549 isl_multi_aff
*tile
;
1554 space
= ppcg_ht_phase_get_input_space(phase
);
1555 space
= isl_space_unwrap(space
);
1556 tile
= isl_multi_aff_range_map(space
);
1557 space_shift
= isl_multi_aff_copy(phase
->space_shift
);
1558 tile
= isl_multi_aff_add(space_shift
, tile
);
1559 space_sizes
= isl_multi_val_copy(phase
->tiling
->space_sizes
);
1560 tile
= isl_multi_aff_scale_down_multi_val(tile
, space_sizes
);
1561 tile
= isl_multi_aff_floor(tile
);
1564 return ppcg_ht_phase_free(phase
);
1565 phase
->space_tile
= tile
;
1569 /* Construct a representation for one of the two phase for hybrid tiling
1570 * "tiling". If "shift" is not set, then the phase is constructed
1571 * directly from the hexagonal tile shape in "tiling", which represents
1572 * the phase-1 tiles. If "shift" is set, then this tile shape is shifted
1573 * back over tiling->shift_phase to obtain the phase-0 tiles.
1575 * First copy data from "tiling", then optionally shift the phase and
1576 * finally move the tiling from the "ts" space of "tiling" to
1577 * the space of the input pattern.
1579 * After the basic phase has been computed, also compute
1580 * the corresponding space shift.
1582 static __isl_give ppcg_ht_phase
*ppcg_ht_tiling_compute_phase(
1583 __isl_keep ppcg_ht_tiling
*tiling
, int shift
)
1585 ppcg_ht_phase
*phase
;
1587 phase
= construct_phase(tiling
);
1589 phase
= shift_phase(phase
);
1590 phase
= lift_phase(phase
);
1592 phase
= compute_space_shift(phase
);
1593 phase
= compute_space_tile(phase
);
1598 /* Consruct a function that is equal to the time tile of "phase0"
1599 * on the domain of "phase0" and equal to the time tile of "phase1"
1600 * on the domain of "phase1".
1601 * The two domains are assumed to form a partition of the input
1604 static __isl_give isl_pw_multi_aff
*combine_time_tile(
1605 __isl_keep ppcg_ht_phase
*phase0
, __isl_keep ppcg_ht_phase
*phase1
)
1608 isl_pw_aff
*time
, *time1
;
1610 if (!phase0
|| !phase1
)
1613 T
= isl_aff_copy(phase0
->time_tile
);
1614 time
= isl_pw_aff_alloc(ppcg_ht_phase_get_domain(phase0
), T
);
1616 T
= isl_aff_copy(phase1
->time_tile
);
1617 time1
= isl_pw_aff_alloc(ppcg_ht_phase_get_domain(phase1
), T
);
1619 time
= isl_pw_aff_union_add(time
, time1
);
1621 return isl_pw_multi_aff_from_pw_aff(time
);
1624 /* Name used in mark nodes that contain a pointer to a ppcg_ht_phase.
1626 static char *ppcg_phase_name
= "phase";
1628 /* Does "id" contain a pointer to a ppcg_ht_phase?
1629 * That is, is it called "phase"?
1631 static isl_bool
is_phase_id(__isl_keep isl_id
*id
)
1635 name
= isl_id_get_name(id
);
1637 return isl_bool_error
;
1639 return !strcmp(name
, ppcg_phase_name
);
1642 /* Given a mark node with an identifier that points to a ppcg_ht_phase,
1643 * extract this ppcg_ht_phase pointer.
1645 __isl_keep ppcg_ht_phase
*ppcg_ht_phase_extract_from_mark(
1646 __isl_keep isl_schedule_node
*node
)
1654 if (isl_schedule_node_get_type(node
) != isl_schedule_node_mark
)
1655 isl_die(isl_schedule_node_get_ctx(node
), isl_error_internal
,
1656 "not a phase mark", return NULL
);
1658 id
= isl_schedule_node_mark_get_id(node
);
1659 is_phase
= is_phase_id(id
);
1660 p
= isl_id_get_user(id
);
1666 isl_die(isl_schedule_node_get_ctx(node
), isl_error_internal
,
1667 "not a phase mark", return NULL
);
1672 /* Insert a mark node at "node" holding a pointer to "phase".
1674 static __isl_give isl_schedule_node
*insert_phase(
1675 __isl_take isl_schedule_node
*node
, __isl_take ppcg_ht_phase
*phase
)
1682 ctx
= isl_schedule_node_get_ctx(node
);
1683 id
= isl_id_alloc(ctx
, ppcg_phase_name
, phase
);
1686 id
= isl_id_set_free_user(id
, &ppcg_ht_phase_free_wrap
);
1687 node
= isl_schedule_node_insert_mark(node
, id
);
1691 ppcg_ht_phase_free(phase
);
1692 isl_schedule_node_free(node
);
1696 /* Construct a mapping from the elements of the original pair of bands
1697 * to which tiling was applied that belong to a tile of "phase"
1698 * to that tile, preserving the values for the outer bands.
1700 * The mapping is of the form
1702 * [[outer] -> [P -> C]] -> [[outer] -> [tile]]
1704 * where tile is defined by a concatenation of the time_tile and
1707 static __isl_give isl_map
*construct_tile_map(__isl_keep ppcg_ht_phase
*phase
)
1712 isl_multi_aff
*tiling
;
1715 depth
= isl_schedule_node_get_schedule_depth(
1716 phase
->tiling
->input_node
);
1717 space
= isl_aff_get_space(phase
->time_tile
);
1718 space
= isl_space_params(space
);
1719 space
= isl_space_set_from_params(space
);
1720 space
= isl_space_add_dims(space
, isl_dim_set
, depth
);
1721 space
= isl_space_map_from_set(space
);
1722 ma
= isl_multi_aff_identity(space
);
1724 tiling
= isl_multi_aff_flat_range_product(
1725 isl_multi_aff_from_aff(isl_aff_copy(phase
->time_tile
)),
1726 isl_multi_aff_copy(phase
->space_tile
));
1727 el2tile
= isl_map_from_multi_aff(tiling
);
1728 el2tile
= isl_map_intersect_domain(el2tile
,
1729 isl_set_copy(phase
->domain
));
1730 el2tile
= isl_map_product(isl_map_from_multi_aff(ma
), el2tile
);
1735 /* Return a description of the full tiles of "phase" at the point
1736 * in the original schedule tree where the tiling was applied.
1738 * First construct a mapping from the input schedule dimensions
1739 * up to an including the original pair of bands to which hybrid tiling
1740 * was applied to schedule dimensions in which this original pair
1741 * has been replaced by the tiles.
1742 * This mapping is of the form
1744 * [[outer] -> [P -> C]] -> [[outer] -> [tile]]
1746 * Apply this mapping to the set of all values for the input
1747 * schedule dimensions and then apply its inverse.
1748 * The result is the set of values for the input schedule dimensions
1749 * that would map to any of the tiles. Subtracting from this set
1750 * the set of values that are actually executed produces the set
1751 * of values that belong to a tile but that are not executed.
1752 * Mapping these back to the tiles produces a description of
1753 * the partial tiles. Subtracting these from the set of all tiles
1754 * produces a description of the full tiles in the form
1756 * [[outer] -> [tile]]
1758 static __isl_give isl_set
*compute_full_tile(__isl_keep ppcg_ht_phase
*phase
)
1760 isl_schedule_node
*node
;
1761 isl_union_set
*domain
;
1762 isl_union_map
*prefix
, *schedule
;
1763 isl_set
*all
, *partial
, *all_el
;
1764 isl_map
*tile2el
, *el2tile
;
1765 isl_multi_union_pw_aff
*mupa
;
1767 el2tile
= construct_tile_map(phase
);
1768 tile2el
= isl_map_reverse(isl_map_copy(el2tile
));
1770 node
= phase
->tiling
->input_node
;
1771 prefix
= isl_schedule_node_get_prefix_schedule_union_map(node
);
1772 domain
= isl_schedule_node_get_domain(node
);
1773 mupa
= isl_multi_union_pw_aff_copy(phase
->tiling
->input_schedule
);
1774 schedule
= isl_union_map_from_multi_union_pw_aff(mupa
);
1775 schedule
= isl_union_map_range_product(prefix
, schedule
);
1776 all_el
= isl_set_from_union_set(isl_union_set_apply(domain
, schedule
));
1777 all_el
= isl_set_coalesce(all_el
);
1779 all
= isl_set_apply(isl_set_copy(all_el
), isl_map_copy(el2tile
));
1781 partial
= isl_set_copy(all
);
1782 partial
= isl_set_apply(partial
, tile2el
);
1783 partial
= isl_set_subtract(partial
, all_el
);
1784 partial
= isl_set_apply(partial
, el2tile
);
1786 return isl_set_subtract(all
, partial
);
1789 /* Copy the AST loop types of the non-isolated part to those
1790 * of the isolated part.
1792 static __isl_give isl_schedule_node
*set_isolate_loop_type(
1793 __isl_take isl_schedule_node
*node
)
1797 n
= isl_schedule_node_band_n_member(node
);
1798 for (i
= 0; i
< n
; ++i
) {
1799 enum isl_ast_loop_type type
;
1801 type
= isl_schedule_node_band_member_get_ast_loop_type(node
, i
);
1802 node
= isl_schedule_node_band_member_set_isolate_ast_loop_type(
1809 /* If options->isolate_full_tiles is set, then mark the full tiles
1810 * in "node" for isolation. The full tiles are derived from "phase".
1811 * "node" may point to a part of the tiling, e.g., the space tiling.
1813 * The full tiles are originally computed in the form
1815 * [[outer] -> [tile]]
1817 * However, the band that "node" points to may only contain
1818 * subset of the tile dimensions.
1819 * The description above is therefore treated as
1821 * [[outer] -> [before; this; after]]
1823 * before is of size "pos"; this is of size "dim"; and
1824 * after is of size "out - pos - dim".
1825 * The after part is first project out. Then the range is split
1826 * into a before and this part and finally the before part is moved
1827 * to the domain, resulting in
1829 * [[outer; before] -> [this]]
1831 * This description is then used as the isolate option.
1833 * The AST loop type for the isolated part is set to be the same
1834 * as that of the non-isolated part.
1836 static __isl_give isl_schedule_node
*ppcg_ht_phase_isolate_full_tile_node(
1837 __isl_keep ppcg_ht_phase
*phase
, __isl_take isl_schedule_node
*node
,
1838 struct ppcg_options
*options
)
1840 int in
, out
, pos
, depth
, dim
;
1842 isl_multi_aff
*ma1
, *ma2
;
1848 if (!options
->isolate_full_tiles
)
1851 depth
= isl_schedule_node_get_schedule_depth(node
);
1852 dim
= isl_schedule_node_band_n_member(node
);
1854 tile
= compute_full_tile(phase
);
1855 map
= isl_set_unwrap(tile
);
1856 in
= isl_map_dim(map
, isl_dim_in
);
1857 out
= isl_map_dim(map
, isl_dim_out
);
1859 map
= isl_map_project_out(map
, isl_dim_out
, pos
+ dim
,
1861 space
= isl_space_range(isl_map_get_space(map
));
1862 ma1
= isl_multi_aff_project_out_map(isl_space_copy(space
),
1863 isl_dim_set
, pos
, dim
);
1864 ma2
= isl_multi_aff_project_out_map(space
, isl_dim_set
, 0, pos
);
1865 ma1
= isl_multi_aff_range_product(ma1
, ma2
);
1866 map
= isl_map_apply_range(map
, isl_map_from_multi_aff(ma1
));
1867 map
= isl_map_uncurry(map
);
1868 map
= isl_map_flatten_domain(map
);
1869 set
= isl_map_wrap(map
);
1870 set
= isl_set_set_tuple_name(set
, "isolate");
1872 opt
= isl_schedule_node_band_get_ast_build_options(node
);
1873 opt
= isl_union_set_add_set(opt
, set
);
1874 node
= isl_schedule_node_band_set_ast_build_options(node
, opt
);
1875 node
= set_isolate_loop_type(node
);
1880 /* Insert a band node for performing the space tiling for "phase" at "node".
1881 * In particular, insert a band node with partial schedule
1883 * [P[t] -> C[s]] -> C[floor((s + space_shift)/space_size)]
1885 * pulled back over the input schedule.
1886 * "options" determines whether full tiles should be separated
1887 * from partial tiles.
1889 * The first tile dimension iterates over the hexagons in the same
1890 * phase, which are independent by construction. The first dimension
1891 * is therefore marked coincident.
1892 * All dimensions are also marked for being generated as atomic loops
1893 * because separation is usually not desirable on tile loops.
1895 static __isl_give isl_schedule_node
*insert_space_tiling(
1896 __isl_keep ppcg_ht_phase
*phase
, __isl_take isl_schedule_node
*node
,
1897 struct ppcg_options
*options
)
1899 isl_multi_aff
*space_tile
;
1900 isl_multi_union_pw_aff
*mupa
;
1903 return isl_schedule_node_free(node
);
1905 space_tile
= isl_multi_aff_copy(phase
->space_tile
);
1906 mupa
= isl_multi_union_pw_aff_copy(phase
->tiling
->input_schedule
);
1907 mupa
= isl_multi_union_pw_aff_apply_multi_aff(mupa
, space_tile
);
1908 node
= isl_schedule_node_insert_partial_schedule(node
, mupa
);
1909 node
= ppcg_set_schedule_node_type(node
, isl_ast_loop_atomic
);
1910 node
= ppcg_ht_phase_isolate_full_tile_node(phase
, node
, options
);
1911 node
= isl_schedule_node_band_member_set_coincident(node
, 0, 1);
1916 /* Given a pointer "node" to (a copy of) the original child node
1917 * in the input pattern, adjust its partial schedule such that
1918 * it starts at zero within each tile.
1920 * That is, replace "s" by (s + space_shift) % space_sizes.
1922 __isl_give isl_schedule_node
*ppcg_ht_phase_shift_space_point(
1923 __isl_keep ppcg_ht_phase
*phase
, __isl_take isl_schedule_node
*node
)
1925 isl_multi_val
*space_sizes
;
1926 isl_multi_aff
*space_shift
;
1927 isl_multi_union_pw_aff
*mupa
;
1929 space_shift
= isl_multi_aff_copy(phase
->space_shift
);
1930 mupa
= isl_multi_union_pw_aff_copy(phase
->tiling
->input_schedule
);
1931 mupa
= isl_multi_union_pw_aff_apply_multi_aff(mupa
, space_shift
);
1932 node
= isl_schedule_node_band_shift(node
, mupa
);
1933 space_sizes
= isl_multi_val_copy(phase
->tiling
->space_sizes
);
1934 node
= isl_schedule_node_band_mod(node
, space_sizes
);
1941 * s0 > delta + 2 * {delta * h} - 1
1945 static isl_bool
wide_enough(__isl_keep isl_val
*s0
, __isl_keep isl_val
*delta
,
1946 __isl_keep isl_val
*h
)
1951 v
= isl_val_mul(isl_val_copy(delta
), isl_val_copy(h
));
1952 v2
= isl_val_floor(isl_val_copy(v
));
1953 v
= isl_val_sub(v
, v2
);
1954 v
= isl_val_mul_ui(v
, 2);
1955 v
= isl_val_add(v
, isl_val_copy(delta
));
1956 v
= isl_val_sub_ui(v
, 1);
1957 ok
= isl_val_gt(s0
, v
);
1963 /* Is the tile size specified by "sizes" wide enough in the first space
1964 * dimension, i.e., the base of the hexagon? This ensures that,
1965 * after hybrid tiling using "bounds" and these sizes,
1966 * neighboring hexagons in the same phase are far enough apart
1967 * that they do not depend on each other.
1968 * The test is only meaningful if the bounds are valid.
1970 * Let st be (half) the size in the time dimension and s0 the base
1971 * size in the first space dimension. Let delta be the dependence
1972 * distance in either positive or negative direction. In principle,
1973 * it should be enough to have s0 + 1 > delta, i.e., s0 >= delta.
1974 * However, in case of fractional delta, the tile is not extended
1975 * with delta * (st - 1), but instead with floor(delta * (st - 1)).
1976 * The condition therefore needs to be adjusted to
1978 * s0 + 1 > delta + 2 {delta * (st - 1)}
1980 * (with {} the fractional part) to account for the two slanted sides.
1981 * The condition in the paper "Hybrid Hexagonal/Classical Tiling for GPUs"
1984 * s0 >= delta + {delta * (st - 1)}
1986 * Since 1 > frac(delta * (st - 1)), this condition implies
1987 * the condition above.
1989 * The condition is checked for both directions.
1991 isl_bool
ppcg_ht_bounds_supports_sizes(__isl_keep ppcg_ht_bounds
*bounds
,
1992 __isl_keep isl_multi_val
*sizes
)
1998 ok
= ppcg_ht_bounds_is_valid(bounds
);
2002 h
= isl_val_sub_ui(isl_multi_val_get_val(sizes
, 0), 1);
2003 s0
= isl_multi_val_get_val(sizes
, 1);
2005 delta
= ppcg_ht_bounds_get_lower(bounds
, 0);
2006 ok
= wide_enough(s0
, delta
, h
);
2007 isl_val_free(delta
);
2009 delta
= ppcg_ht_bounds_get_upper(bounds
);
2010 if (ok
== isl_bool_true
)
2011 ok
= wide_enough(s0
, delta
, h
);
2012 isl_val_free(delta
);
2020 /* Check that the tile will be wide enough in the first space
2021 * dimension, i.e., the base of the hexagon. This ensures that
2022 * neighboring hexagons in the same phase are far enough apart
2023 * that they do not depend on each other.
2025 * Error out if the condition fails to hold.
2027 static isl_stat
check_width(__isl_keep ppcg_ht_bounds
*bounds
,
2028 __isl_keep isl_multi_val
*sizes
)
2032 ok
= ppcg_ht_bounds_supports_sizes(bounds
, sizes
);
2035 return isl_stat_error
;
2037 isl_die(isl_multi_val_get_ctx(sizes
), isl_error_invalid
,
2038 "base of hybrid tiling hexagon not sufficiently wide",
2039 return isl_stat_error
);
2044 /* Given valid bounds on the relative dependence distances for
2045 * the pair of nested nodes that "node" point to, as well as sufficiently
2046 * wide tile sizes "sizes", insert the corresponding time and space tiling
2047 * at "node", along with a pair of phase nodes that can be used
2048 * to make further changes.
2049 * The space of "sizes" should be the product of the spaces
2050 * of the schedules of the pair of parent and child nodes.
2051 * "options" determines whether full tiles should be separated
2052 * from partial tiles.
2054 * In particular, given an input of the form
2058 * the output has the form
2060 * /- F0 - M0 - CT0 - P - C - ...
2062 * \- F1 - M1 - CT1 - P - C - ...
2064 * PT is the global time tiling. Within each of these tiles,
2065 * two phases are executed in order. Within each phase, the schedule
2066 * space is further subdivided into tiles through CT0 and CT1.
2067 * The first dimension of each of these iterates over the hexagons
2068 * within a phase and these are independent by construction.
2069 * The F0 and F1 filters filter the statement instances that belong
2070 * to the corresponding phase. The M0 and M1 marks contain a pointer
2071 * to a ppcg_ht_phase object that can be used to perform further changes.
2073 * After checking that input satisfies the requirements,
2074 * a data structure is constructed that represents the tiling and
2075 * two additional data structures are constructed for the two phases
2076 * of the tiling. These are then used to define the filters F0 and F1 and
2077 * combined to construct the time tiling PT.
2078 * Then the time tiling node PT is inserted, followed by
2079 * the sequence with the two filters, the CT space tiling nodes and
2080 * the phase markers M.
2082 __isl_give isl_schedule_node
*ppcg_ht_bounds_insert_tiling(
2083 __isl_take ppcg_ht_bounds
*bounds
, __isl_take isl_multi_val
*sizes
,
2084 __isl_take isl_schedule_node
*node
, struct ppcg_options
*options
)
2087 isl_union_set
*phase0
;
2088 isl_union_set
*phase1
;
2089 isl_multi_union_pw_aff
*input
, *dom_time
;
2090 isl_union_pw_multi_aff
*upma
;
2091 isl_pw_multi_aff
*time
;
2092 isl_union_set_list
*phases
;
2093 ppcg_ht_tiling
*tiling
;
2094 ppcg_ht_phase
*phase_0
;
2095 ppcg_ht_phase
*phase_1
;
2097 if (!node
|| !sizes
|| !bounds
)
2099 if (check_input_pattern(node
) < 0 || check_width(bounds
, sizes
) < 0)
2102 ctx
= isl_schedule_node_get_ctx(node
);
2104 input
= extract_input_schedule(node
);
2106 tiling
= ppcg_ht_bounds_construct_tiling(bounds
, node
, input
, sizes
);
2107 phase_0
= ppcg_ht_tiling_compute_phase(tiling
, 1);
2108 phase_1
= ppcg_ht_tiling_compute_phase(tiling
, 0);
2109 time
= combine_time_tile(phase_0
, phase_1
);
2110 ppcg_ht_tiling_free(tiling
);
2112 upma
= isl_union_pw_multi_aff_from_multi_union_pw_aff(
2113 isl_multi_union_pw_aff_copy(input
));
2114 phase0
= isl_union_set_from_set(ppcg_ht_phase_get_domain(phase_0
));
2115 phase0
= isl_union_set_preimage_union_pw_multi_aff(phase0
,
2116 isl_union_pw_multi_aff_copy(upma
));
2117 phase1
= isl_union_set_from_set(ppcg_ht_phase_get_domain(phase_1
));
2118 phase1
= isl_union_set_preimage_union_pw_multi_aff(phase1
, upma
);
2120 phases
= isl_union_set_list_alloc(ctx
, 2);
2121 phases
= isl_union_set_list_add(phases
, phase0
);
2122 phases
= isl_union_set_list_add(phases
, phase1
);
2124 dom_time
= isl_multi_union_pw_aff_apply_pw_multi_aff(input
, time
);
2125 node
= isl_schedule_node_insert_partial_schedule(node
, dom_time
);
2127 node
= isl_schedule_node_child(node
, 0);
2129 node
= isl_schedule_node_insert_sequence(node
, phases
);
2130 node
= isl_schedule_node_child(node
, 0);
2131 node
= isl_schedule_node_child(node
, 0);
2132 node
= insert_space_tiling(phase_0
, node
, options
);
2133 node
= insert_phase(node
, phase_0
);
2134 node
= isl_schedule_node_parent(node
);
2135 node
= isl_schedule_node_next_sibling(node
);
2136 node
= isl_schedule_node_child(node
, 0);
2137 node
= insert_space_tiling(phase_1
, node
, options
);
2138 node
= insert_phase(node
, phase_1
);
2139 node
= isl_schedule_node_parent(node
);
2140 node
= isl_schedule_node_parent(node
);
2142 node
= isl_schedule_node_parent(node
);
2144 isl_multi_val_free(sizes
);
2147 isl_multi_val_free(sizes
);
2148 isl_schedule_node_free(node
);
2149 ppcg_ht_bounds_free(bounds
);
2153 /* Given a branch "node" that contains a sequence node with two phases
2154 * of hybrid tiling as input, call "fn" on each of the two phase marker
2157 * That is, the input is as follows
2163 * and "fn" is called on M0 and on M1.
2165 __isl_give isl_schedule_node
*hybrid_tile_foreach_phase(
2166 __isl_take isl_schedule_node
*node
,
2167 __isl_give isl_schedule_node
*(*fn
)(__isl_take isl_schedule_node
*node
,
2168 void *user
), void *user
)
2172 depth0
= isl_schedule_node_get_tree_depth(node
);
2175 isl_schedule_node_get_type(node
) != isl_schedule_node_sequence
)
2176 node
= isl_schedule_node_child(node
, 0);
2178 node
= isl_schedule_node_child(node
, 0);
2179 node
= isl_schedule_node_child(node
, 0);
2182 node
= fn(node
, user
);
2183 node
= isl_schedule_node_parent(node
);
2184 node
= isl_schedule_node_next_sibling(node
);
2185 node
= isl_schedule_node_child(node
, 0);
2188 node
= fn(node
, user
);
2189 node
= isl_schedule_node_parent(node
);
2190 node
= isl_schedule_node_parent(node
);
2192 depth
= isl_schedule_node_get_tree_depth(node
);
2193 node
= isl_schedule_node_ancestor(node
, depth
- depth0
);
2198 /* This function is called on each of the two phase marks
2199 * in a hybrid tiling tree.
2200 * Drop the phase mark at "node".
2202 static __isl_give isl_schedule_node
*drop_phase_mark(
2203 __isl_take isl_schedule_node
*node
, void *user
)
2208 if (isl_schedule_node_get_type(node
) != isl_schedule_node_mark
)
2211 id
= isl_schedule_node_mark_get_id(node
);
2212 is_phase
= is_phase_id(id
);
2216 return isl_schedule_node_free(node
);
2218 node
= isl_schedule_node_delete(node
);
2223 /* Given a branch "node" that contains a sequence node with two phases
2224 * of hybrid tiling as input, remove the two phase marker nodes.
2226 * That is, the input is as follows
2238 __isl_give isl_schedule_node
*hybrid_tile_drop_phase_marks(
2239 __isl_take isl_schedule_node
*node
)
2241 return hybrid_tile_foreach_phase(node
, &drop_phase_mark
, NULL
);