update pet for more relaxed pet_expr_is_equal index comparison
[ppn.git] / mem_bound.cc
blobf2d6148b602d12cc7ab736c25ed14c8a9933657d
1 #include <vector>
2 #include <iostream>
3 #include <isl/space.h>
4 #include <isl/set.h>
5 #include <isl/map.h>
6 #include <isl/constraint.h>
7 #include <isl/polynomial.h>
8 #include <isl/printer.h>
9 #include <barvinok/isl.h>
10 #include <isa/yaml.h>
11 #include <isa/pdg.h>
12 #include "da.h"
13 #include "translation.h"
14 #include "mem_options.h"
16 using std::vector;
17 using pdg::PDG;
18 using namespace da;
19 using std::cout;
20 using std::cerr;
21 using std::endl;
23 /* Compute the number of array elements that are read but not
24 * (previously) written. These are assumed to be the input.
26 static __isl_give isl_pw_qpolynomial *compute_input_size(PDG *pdg,
27 struct options *options)
29 unsigned nparam = pdg->params.size();
30 isl_space *space;
31 isl_pw_qpolynomial *inputsize;
33 space = isl_space_alloc(pdg->get_isl_ctx(), nparam, 0, 1);
34 isl_dim_set_parameter_names(space, pdg->params);
35 inputsize = isl_pw_qpolynomial_zero(space);
37 if (!options->with_input)
38 return inputsize;
40 for (int i = 0; i < pdg->dependences.size(); ++i) {
41 isl_map *map;
42 isl_pw_qpolynomial *size;
43 pdg::dependence *dep = pdg->dependences[i];
45 if (dep->type != pdg::dependence::uninitialized)
46 continue;
48 map = dep->relation->get_isl_map(pdg->get_isl_ctx());
49 size = isl_map_card(map);
50 inputsize = isl_pw_qpolynomial_add(inputsize, size);
53 return inputsize;
56 /* Compute the number of elements in domain for a fixed value
57 * of the "fixed_loops" outer loop iterators and return this value
58 * as a function of these loop iterators and the parameters.
60 static __isl_give isl_pw_qpolynomial *compute_accesses_at(
61 __isl_take isl_set *domain, int fixed_loops)
63 isl_map *map;
64 map = isl_map_from_range(domain);
65 map = isl_map_move_dims(map, isl_dim_in, 0, isl_dim_out, 0, fixed_loops);
66 return isl_map_card(map);
69 /* Compute the number of elements in domain for a fixed value
70 * of the "level" outer loop iterators and for all values of the
71 * loop iterator at level "level" smaller than a fixed value
72 * and return this value as a function of these (level+1) loop iterators
73 * and the parameters.
75 static __isl_give isl_pw_qpolynomial *compute_accesses_before(
76 __isl_take isl_set *domain, int level)
78 isl_map *map;
79 map = isl_map_from_range(domain);
80 map = isl_map_move_dims(map, isl_dim_in, 0, isl_dim_out, 0, level);
81 map = isl_map_add_dims(map, isl_dim_in, 1);
82 map = isl_map_order_lt(map, isl_dim_out, 0, isl_dim_in, level);
83 return isl_map_card(map);
86 static int cmp_prefix_stat_dims(pdg::node *node, int *stat_dims, int d)
88 for (int j = 0; j < d; ++j)
89 if (node->prefix[2*j] != stat_dims[j])
90 return node->prefix[2*j] - stat_dims[j];
92 return 0;
96 * Returns true if dependence is independent of first n_loops loops.
98 static int is_loop_independent(isl_ctx *ctx,
99 pdg::IslMap *relation, int n_loops)
101 isl_map *rel;
102 isl_map *test;
103 assert(relation->input() >= n_loops);
104 assert(relation->output() >= n_loops);
105 int indep;
107 rel = relation->get_isl_map(ctx);
108 test = isl_map_universe(isl_map_get_space(rel));
109 for (int i = 0; i < n_loops; ++i)
110 test = isl_map_equate(test, isl_dim_in, i, isl_dim_out, i);
111 indep = isl_map_is_subset(rel, test);
112 isl_map_free(rel);
113 isl_map_free(test);
115 return indep;
118 struct contribution {
119 unsigned is_read : 1;
120 unsigned is_written : 1;
124 * Determines the net contribution of "dep" within statements
125 * with statement level dimensions prefixed by stat_dims
126 * and within a fixed iteration of the first n_loop loops.
128 static struct contribution net_contribution(isl_ctx *ctx, pdg::dependence *dep,
129 int *stat_dims, int d, int n_loop,
130 struct options *options)
132 struct contribution c;
133 c.is_read = 0;
134 c.is_written = 0;
136 if (dep->type == pdg::dependence::uninitialized) {
137 if (options->with_input &&
138 cmp_prefix_stat_dims(dep->to, stat_dims, d) == 0)
139 c.is_read = 1;
140 } else {
141 int is_written = cmp_prefix_stat_dims(dep->from, stat_dims, d) == 0;
142 int is_read = cmp_prefix_stat_dims(dep->to, stat_dims, d) == 0;
143 int is_local = is_written && is_read &&
144 is_loop_independent(ctx, dep->relation, n_loop);
145 if (!is_local) {
146 c.is_read = is_read;
147 c.is_written = is_written;
151 return c;
154 struct access {
155 isl_set *read;
156 isl_set *write;
160 * Compute increment in statement/loop identified by stat_dims
162 static __isl_give isl_pw_qpolynomial *compute_live_increment(PDG *pdg,
163 int *stat_dims, int d, struct access *accesses, struct options *options)
165 unsigned nparam = pdg->params.size();
166 isl_space *space;
167 isl_pw_qpolynomial *sum;
169 space = isl_space_alloc(pdg->get_isl_ctx(), nparam, d - 1, 1);
170 isl_dim_set_parameter_names(space, pdg->params);
171 sum = isl_pw_qpolynomial_zero(space);
172 for (int i = 0; i < pdg->dependences.size(); ++i) {
173 pdg::dependence *dep = pdg->dependences[i];
174 struct contribution c;
176 c = net_contribution(pdg->get_isl_ctx(),
177 dep, stat_dims, d, d-1, options);
179 if (c.is_written) {
180 isl_pw_qpolynomial *inc;
181 inc = compute_accesses_at(isl_set_copy(accesses[i].write), d - 1);
182 sum = isl_pw_qpolynomial_add(sum, inc);
184 if (c.is_read) {
185 isl_pw_qpolynomial *dec;
186 dec = compute_accesses_at(isl_set_copy(accesses[i].read), d - 1);
187 sum = isl_pw_qpolynomial_sub(sum, dec);
190 return sum;
194 * Compute increment in previous iterations of loop at given level.
196 static __isl_give isl_pw_qpolynomial *compute_live_increment_in_loop(PDG *pdg,
197 int *stat_dims, int level, struct access *accesses,
198 struct options *options)
200 unsigned nparam = pdg->params.size();
201 isl_space *space;
202 isl_pw_qpolynomial *sum;
204 space = isl_space_alloc(pdg->get_isl_ctx(), nparam, level + 1, 1);
205 isl_dim_set_parameter_names(space, pdg->params);
206 sum = isl_pw_qpolynomial_zero(space);
207 for (int i = 0; i < pdg->dependences.size(); ++i) {
208 pdg::dependence *dep = pdg->dependences[i];
209 struct contribution c;
211 c = net_contribution(pdg->get_isl_ctx(),
212 dep, stat_dims, level+1, level+1, options);
214 if (c.is_written) {
215 isl_pw_qpolynomial *inc;
216 inc = compute_accesses_before(isl_set_copy(accesses[i].write), level);
217 sum = isl_pw_qpolynomial_add(sum, inc);
219 if (c.is_read) {
220 isl_pw_qpolynomial *dec;
221 dec = compute_accesses_before(isl_set_copy(accesses[i].read), level);
222 sum = isl_pw_qpolynomial_sub(sum, dec);
225 return sum;
228 struct tree;
230 struct tree {
231 int d;
232 int *stat_dims;
233 vector<struct tree*> children;
234 isl_set *domain;
238 * Compute bound on number of live array elements in the
239 * given (sub)tree.
241 * live_before: number of live elements before the loop/statement
242 * in terms of the outer loop iterators
244 static __isl_give isl_pw_qpolynomial_fold *compute_bound(PDG *pdg,
245 __isl_take isl_pw_qpolynomial *live,
246 struct tree *tree,
247 struct access *accesses,
248 struct options *options)
250 /* If we are in a leaf, we actually compute the bound. */
251 if (tree->children.size() == 0) {
252 isl_set *domain;
253 isl_pw_qpolynomial_fold *bound;
255 domain = isl_set_copy(tree->domain);
256 domain = isl_set_reset_tuple_id(domain);
257 live = isl_pw_qpolynomial_intersect_domain(live, domain);
258 bound = isl_pw_qpolynomial_bound(live, isl_fold_max, NULL);
260 return bound;
263 isl_pw_qpolynomial_fold *pwf;
265 /* If we are not in the outer dummy loop, add contribution of
266 * previous iterations.
268 if (tree->d > 0) {
269 isl_space *dim;
270 isl_pw_qpolynomial *inc;
272 live = isl_pw_qpolynomial_add_dims(live, isl_dim_in, 1);
273 inc = compute_live_increment_in_loop(pdg, tree->stat_dims, tree->d-1,
274 accesses, options);
275 live = isl_pw_qpolynomial_add(live, inc);
278 pwf = compute_bound(pdg, isl_pw_qpolynomial_copy(live),
279 tree->children[0], accesses, options);
280 for (int i = 1; i < tree->children.size(); ++i) {
281 isl_pw_qpolynomial_fold *pwf_i;
282 isl_pw_qpolynomial *inc;
283 struct tree *child = tree->children[i-1];
284 inc = compute_live_increment(pdg, child->stat_dims, child->d,
285 accesses, options);
286 live = isl_pw_qpolynomial_add(live, inc);
287 pwf_i = compute_bound(pdg, isl_pw_qpolynomial_copy(live),
288 tree->children[i], accesses, options);
289 pwf = isl_pw_qpolynomial_fold_fold(pwf, pwf_i);
291 isl_pw_qpolynomial_free(live);
292 return pwf;
296 * Construct a tree for all statements whose statement dimensions
297 * start with the d first elements of stat_dims.
298 * The first one of those statemens is statement n.
300 static struct tree *compute_tree(PDG *pdg, int *stat_dims, int d, int n)
302 struct tree *tree = new struct tree;
303 tree->domain = NULL;
304 tree->d = d;
305 tree->stat_dims = new int[d];
306 for (int i = 0; i < d; ++i)
307 tree->stat_dims[i] = stat_dims[i];
309 if (pdg->nodes[n]->prefix.size() == 2*d-1) {
310 tree->domain = pdg->nodes[n]->source->get_isl_set(pdg->get_isl_ctx());
311 return tree;
314 for (int i = n; i < pdg->nodes.size(); ++i) {
315 if (cmp_prefix_stat_dims(pdg->nodes[i], stat_dims, d) > 0)
316 break;
318 stat_dims[d] = pdg->nodes[i]->prefix[2*d];
320 struct tree *child = compute_tree(pdg, stat_dims, d+1, i);
321 tree->children.push_back(child);
323 /* Skip over nodes with the same prefix at current level */
324 for (; i+1 < pdg->nodes.size(); ++i)
325 if (cmp_prefix_stat_dims(pdg->nodes[i+1], stat_dims, d+1) != 0)
326 break;
328 return tree;
331 static struct tree *compute_tree(PDG *pdg)
333 int n_stat_dims = normalize_prefix(pdg);
334 int stat_dims[n_stat_dims];
336 return compute_tree(pdg, stat_dims, 0, 0);
339 static void free_tree(struct tree *tree)
341 for (int i = 0; i < tree->children.size(); ++i)
342 free_tree(tree->children[i]);
343 delete [] tree->stat_dims;
344 isl_set_free(tree->domain);
345 delete tree;
348 static void compute_bound(PDG *pdg, struct options *options)
350 struct tree *tree = compute_tree(pdg);
352 struct access accesses[pdg->dependences.size()];
353 for (int i = 0; i < pdg->dependences.size(); ++i) {
354 pdg::dependence *dep = pdg->dependences[i];
355 isl_map *rel = dep->relation->get_isl_map(pdg->get_isl_ctx());
357 accesses[i].write = isl_map_domain(isl_map_copy(rel));
358 accesses[i].read = isl_map_range(rel);
361 isl_pw_qpolynomial *inputsize;
362 inputsize = compute_input_size(pdg, options);
364 isl_pw_qpolynomial_fold *pwf;
365 pwf = compute_bound(pdg, inputsize, tree, accesses, options);
366 free_tree(tree);
367 isl_set *context = pdg->get_context_isl_set();
368 pwf = isl_pw_qpolynomial_fold_gist(pwf, context);
370 isl_printer *printer;
371 printer = isl_printer_to_file(pdg->get_isl_ctx(), stdout);
372 printer = isl_printer_set_output_format(printer, ISL_FORMAT_C);
373 printer = isl_printer_print_pw_qpolynomial_fold(printer, pwf);
374 printer = isl_printer_end_line(printer);
375 isl_printer_free(printer);
377 isl_pw_qpolynomial_fold_free(pwf);
379 for (int i = 0; i < pdg->dependences.size(); ++i) {
380 isl_set_free(accesses[i].write);
381 isl_set_free(accesses[i].read);
385 int main(int argc, char * argv[])
387 PDG *pdg;
388 isl_ctx *ctx = isl_ctx_alloc();
389 struct options *options = options_new_with_defaults();
391 argc = options_parse(options, argc, argv, ISL_ARG_ALL);
394 pdg = PDG::Load(stdin, ctx);
395 int nparam = pdg->params.size();
397 for (int i = 0; i < pdg->arrays.size(); ++i)
398 find_deps(pdg, pdg->arrays[i], data_reuse);
400 compute_bound(pdg, options);
402 pdg->free();
403 delete pdg;
404 options_free(options);
405 isl_ctx_free(ctx);
407 return 0;