2 * Copyright 2013 Ecole Normale Superieure
3 * Copyright 2015 Sven Verdoolaege
5 * Use of this software is governed by the MIT license
7 * Written by Sven Verdoolaege,
8 * Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
14 #include <isl/space.h>
15 #include <isl/union_set.h>
16 #include <isl/schedule_node.h>
19 #include "gpu_hybrid.h"
24 /* Have all domain elements been filtered out before reaching
25 * the "node" position in the schedule tree?
27 static isl_bool
has_empty_domain(__isl_keep isl_schedule_node
*node
)
29 isl_union_set
*domain
;
32 domain
= isl_schedule_node_get_domain(node
);
33 empty
= isl_union_set_is_empty(domain
);
34 isl_union_set_free(domain
);
39 /* Given a pointer to a phase in the result of hybrid tiling,
40 * map the phase to the device, provided the phase is non-empty.
41 * Empty phases can occur if the input schedule domain can be
42 * covered by a small number of hexagons that all belong to the same phase.
44 * The input has the following form:
46 * M - CT - P - C - ...
48 * with M the phase marker, CT the space tiling, P the original
49 * parent band and C the original child band.
50 * The (outer dimensions of the) C band need to be mapped to threads.
51 * The (outer dimension of the) CT band needs to be mapped to blocks.
52 * The mapping to shared memory needs to be computed between the CT and
55 * The C band is first shifted to start at zero.
56 * Then the appropriate markers are introduced and a kernel is
57 * created for the tree rooted at CT.
58 * If the "unroll_gpu_tile" option is set, then the AST generator
59 * is instructed to unroll the P and C bands.
61 static __isl_give isl_schedule_node
*update_phase(
62 __isl_take isl_schedule_node
*node
, void *user
)
64 struct gpu_gen
*gen
= user
;
68 isl_bool empty_domain
;
71 empty_domain
= has_empty_domain(node
);
73 return isl_schedule_node_free(node
);
79 ctx
= isl_schedule_node_get_ctx(node
);
81 phase
= ppcg_ht_phase_extract_from_mark(node
);
83 depth0
= isl_schedule_node_get_tree_depth(node
);
85 node
= isl_schedule_node_child(node
, 0);
87 node
= isl_schedule_node_child(node
, 0);
88 node
= isl_schedule_node_child(node
, 0);
89 node
= ppcg_ht_phase_shift_space_point(phase
, node
);
90 if (gen
->options
->unroll_gpu_tile
)
91 node
= ppcg_set_schedule_node_type(node
, isl_ast_loop_unroll
);
92 id
= isl_id_alloc(ctx
, "thread", NULL
);
93 node
= isl_schedule_node_insert_mark(node
, id
);
94 node
= isl_schedule_node_parent(node
);
95 if (gen
->options
->unroll_gpu_tile
)
96 node
= ppcg_set_schedule_node_type(node
, isl_ast_loop_unroll
);
97 id
= isl_id_alloc(ctx
, "shared", NULL
);
98 node
= isl_schedule_node_insert_mark(node
, id
);
99 node
= isl_schedule_node_parent(node
);
101 node
= gpu_create_kernel(gen
, node
, 0, NULL
);
103 depth
= isl_schedule_node_get_tree_depth(node
);
104 node
= isl_schedule_node_ancestor(node
, depth
- depth0
);
109 /* Apply hybrid tiling on "node" and its parent based on the (valid)
110 * bounds on the relative dependence distances "bounds" and
111 * the tile sizes in "tile_sizes".
112 * The number of elements in "tile_sizes" is at least as large
113 * as the sum of the dimensions of the parent and the child node.
115 * Convert the tile_sizes to an isl_multi_val in the right space,
116 * insert the hybrid tiling and then create a kernel inside each phase.
117 * Finally, remove the phase marks.
119 __isl_give isl_schedule_node
*gpu_hybrid_tile(struct gpu_gen
*gen
,
120 __isl_take isl_schedule_node
*node
, __isl_take ppcg_ht_bounds
*bounds
,
124 isl_space
*space
, *space2
;
126 if (!node
|| !bounds
)
129 space2
= isl_schedule_node_band_get_space(node
);
130 node
= isl_schedule_node_parent(node
);
131 space
= isl_schedule_node_band_get_space(node
);
132 space
= isl_space_product(space
, space2
);
133 mv
= ppcg_multi_val_from_int_list(space
, tile_sizes
);
135 node
= ppcg_ht_bounds_insert_tiling(bounds
, mv
, node
, gen
->options
);
137 node
= hybrid_tile_foreach_phase(node
, &update_phase
, gen
);
139 node
= hybrid_tile_drop_phase_marks(node
);
143 isl_schedule_node_free(node
);
144 ppcg_ht_bounds_free(bounds
);