[AMDGPU] Test codegen'ing True16 additions.
[llvm-project.git] / polly / lib / External / isl / isl_scheduler_clustering.c
blobc2e91c90546ab5a691a3f0f66843a735aff367c9
1 /*
2 * Copyright 2015 Sven Verdoolaege
4 * Use of this software is governed by the MIT license
6 * Written by Sven Verdoolaege
7 */
9 #include "isl_map_private.h"
11 #include <isl/id.h>
12 #include <isl/schedule_node.h>
13 #include <isl/union_set.h>
15 #include "isl_mat_private.h"
16 #include "isl_scheduler_clustering.h"
17 #include "isl_scheduler_scc.h"
18 #include "isl_seq.h"
19 #include "isl_tarjan.h"
21 /* Initialize the clustering data structure "c" from "graph".
23 * In particular, allocate memory, extract the SCCs from "graph"
24 * into c->scc, initialize scc_cluster and construct
25 * a band of schedule rows for each SCC.
26 * Within each SCC, there is only one SCC by definition.
27 * Each SCC initially belongs to a cluster containing only that SCC.
29 static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c,
30 struct isl_sched_graph *graph)
32 int i;
34 c->n = graph->scc;
35 c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
36 c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
37 c->scc_cluster = isl_calloc_array(ctx, int, c->n);
38 c->scc_node = isl_calloc_array(ctx, int, c->n);
39 c->scc_in_merge = isl_calloc_array(ctx, int, c->n);
40 if (!c->scc || !c->cluster ||
41 !c->scc_cluster || !c->scc_node || !c->scc_in_merge)
42 return isl_stat_error;
44 for (i = 0; i < c->n; ++i) {
45 if (isl_sched_graph_extract_sub_graph(ctx, graph,
46 &isl_sched_node_scc_exactly,
47 &isl_sched_edge_scc_exactly,
48 i, &c->scc[i]) < 0)
49 return isl_stat_error;
50 c->scc[i].scc = 1;
51 if (isl_sched_graph_compute_maxvar(&c->scc[i]) < 0)
52 return isl_stat_error;
53 if (isl_schedule_node_compute_wcc_band(ctx, &c->scc[i]) < 0)
54 return isl_stat_error;
55 c->scc_cluster[i] = i;
58 return isl_stat_ok;
61 /* Free all memory allocated for "c".
63 static void clustering_free(isl_ctx *ctx, struct isl_clustering *c)
65 int i;
67 if (c->scc)
68 for (i = 0; i < c->n; ++i)
69 isl_sched_graph_free(ctx, &c->scc[i]);
70 free(c->scc);
71 if (c->cluster)
72 for (i = 0; i < c->n; ++i)
73 isl_sched_graph_free(ctx, &c->cluster[i]);
74 free(c->cluster);
75 free(c->scc_cluster);
76 free(c->scc_node);
77 free(c->scc_in_merge);
80 /* Should we refrain from merging the cluster in "graph" with
81 * any other cluster?
82 * In particular, is its current schedule band empty and incomplete.
84 static int bad_cluster(struct isl_sched_graph *graph)
86 return graph->n_row < graph->maxvar &&
87 graph->n_total_row == graph->band_start;
90 /* Is "edge" a proximity edge with a non-empty dependence relation?
92 static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge)
94 if (!isl_sched_edge_is_proximity(edge))
95 return isl_bool_false;
96 return isl_bool_not(isl_map_plain_is_empty(edge->map));
99 /* Return the index of an edge in "graph" that can be used to merge
100 * two clusters in "c".
101 * Return graph->n_edge if no such edge can be found.
102 * Return -1 on error.
104 * In particular, return a proximity edge between two clusters
105 * that is not marked "no_merge" and such that neither of the
106 * two clusters has an incomplete, empty band.
108 * If there are multiple such edges, then try and find the most
109 * appropriate edge to use for merging. In particular, pick the edge
110 * with the greatest weight. If there are multiple of those,
111 * then pick one with the shortest distance between
112 * the two cluster representatives.
114 static int find_proximity(struct isl_sched_graph *graph,
115 struct isl_clustering *c)
117 int i, best = graph->n_edge, best_dist, best_weight;
119 for (i = 0; i < graph->n_edge; ++i) {
120 struct isl_sched_edge *edge = &graph->edge[i];
121 int dist, weight;
122 isl_bool prox;
124 prox = is_non_empty_proximity(edge);
125 if (prox < 0)
126 return -1;
127 if (!prox)
128 continue;
129 if (edge->no_merge)
130 continue;
131 if (bad_cluster(&c->scc[edge->src->scc]) ||
132 bad_cluster(&c->scc[edge->dst->scc]))
133 continue;
134 dist = c->scc_cluster[edge->dst->scc] -
135 c->scc_cluster[edge->src->scc];
136 if (dist == 0)
137 continue;
138 weight = edge->weight;
139 if (best < graph->n_edge) {
140 if (best_weight > weight)
141 continue;
142 if (best_weight == weight && best_dist <= dist)
143 continue;
145 best = i;
146 best_dist = dist;
147 best_weight = weight;
150 return best;
153 /* Internal data structure used in mark_merge_sccs.
155 * "graph" is the dependence graph in which a strongly connected
156 * component is constructed.
157 * "scc_cluster" maps each SCC index to the cluster to which it belongs.
158 * "src" and "dst" are the indices of the nodes that are being merged.
160 struct isl_mark_merge_sccs_data {
161 struct isl_sched_graph *graph;
162 int *scc_cluster;
163 int src;
164 int dst;
167 /* Check whether the cluster containing node "i" depends on the cluster
168 * containing node "j". If "i" and "j" belong to the same cluster,
169 * then they are taken to depend on each other to ensure that
170 * the resulting strongly connected component consists of complete
171 * clusters. Furthermore, if "i" and "j" are the two nodes that
172 * are being merged, then they are taken to depend on each other as well.
173 * Otherwise, check if there is a (conditional) validity dependence
174 * from node[j] to node[i], forcing node[i] to follow node[j].
176 static isl_bool cluster_follows(int i, int j, void *user)
178 struct isl_mark_merge_sccs_data *data = user;
179 struct isl_sched_graph *graph = data->graph;
180 int *scc_cluster = data->scc_cluster;
182 if (data->src == i && data->dst == j)
183 return isl_bool_true;
184 if (data->src == j && data->dst == i)
185 return isl_bool_true;
186 if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc])
187 return isl_bool_true;
189 return isl_sched_graph_has_validity_edge(graph, &graph->node[j],
190 &graph->node[i]);
193 /* Mark all SCCs that belong to either of the two clusters in "c"
194 * connected by the edge in "graph" with index "edge", or to any
195 * of the intermediate clusters.
196 * The marking is recorded in c->scc_in_merge.
198 * The given edge has been selected for merging two clusters,
199 * meaning that there is at least a proximity edge between the two nodes.
200 * However, there may also be (indirect) validity dependences
201 * between the two nodes. When merging the two clusters, all clusters
202 * containing one or more of the intermediate nodes along the
203 * indirect validity dependences need to be merged in as well.
205 * First collect all such nodes by computing the strongly connected
206 * component (SCC) containing the two nodes connected by the edge, where
207 * the two nodes are considered to depend on each other to make
208 * sure they end up in the same SCC. Similarly, each node is considered
209 * to depend on every other node in the same cluster to ensure
210 * that the SCC consists of complete clusters.
212 * Then the original SCCs that contain any of these nodes are marked
213 * in c->scc_in_merge.
215 static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph,
216 int edge, struct isl_clustering *c)
218 struct isl_mark_merge_sccs_data data;
219 struct isl_tarjan_graph *g;
220 int i;
222 for (i = 0; i < c->n; ++i)
223 c->scc_in_merge[i] = 0;
225 data.graph = graph;
226 data.scc_cluster = c->scc_cluster;
227 data.src = graph->edge[edge].src - graph->node;
228 data.dst = graph->edge[edge].dst - graph->node;
230 g = isl_tarjan_graph_component(ctx, graph->n, data.dst,
231 &cluster_follows, &data);
232 if (!g)
233 goto error;
235 i = g->op;
236 if (i < 3)
237 isl_die(ctx, isl_error_internal,
238 "expecting at least two nodes in component",
239 goto error);
240 if (g->order[--i] != -1)
241 isl_die(ctx, isl_error_internal,
242 "expecting end of component marker", goto error);
244 for (--i; i >= 0 && g->order[i] != -1; --i) {
245 int scc = graph->node[g->order[i]].scc;
246 c->scc_in_merge[scc] = 1;
249 isl_tarjan_graph_free(g);
250 return isl_stat_ok;
251 error:
252 isl_tarjan_graph_free(g);
253 return isl_stat_error;
256 /* Construct the identifier "cluster_i".
258 static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i)
260 char name[40];
262 snprintf(name, sizeof(name), "cluster_%d", i);
263 return isl_id_alloc(ctx, name, NULL);
266 /* Construct the space of the cluster with index "i" containing
267 * the strongly connected component "scc".
269 * In particular, construct a space called cluster_i with dimension equal
270 * to the number of schedule rows in the current band of "scc".
272 static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i)
274 int nvar;
275 isl_space *space;
276 isl_id *id;
278 nvar = scc->n_total_row - scc->band_start;
279 space = isl_space_copy(scc->node[0].space);
280 space = isl_space_params(space);
281 space = isl_space_set_from_params(space);
282 space = isl_space_add_dims(space, isl_dim_set, nvar);
283 id = cluster_id(isl_space_get_ctx(space), i);
284 space = isl_space_set_tuple_id(space, isl_dim_set, id);
286 return space;
289 /* Collect the domain of the graph for merging clusters.
291 * In particular, for each cluster with first SCC "i", construct
292 * a set in the space called cluster_i with dimension equal
293 * to the number of schedule rows in the current band of the cluster.
295 static __isl_give isl_union_set *collect_domain(isl_ctx *ctx,
296 struct isl_sched_graph *graph, struct isl_clustering *c)
298 int i;
299 isl_space *space;
300 isl_union_set *domain;
302 space = isl_space_params_alloc(ctx, 0);
303 domain = isl_union_set_empty(space);
305 for (i = 0; i < graph->scc; ++i) {
306 isl_space *space;
308 if (!c->scc_in_merge[i])
309 continue;
310 if (c->scc_cluster[i] != i)
311 continue;
312 space = cluster_space(&c->scc[i], i);
313 domain = isl_union_set_add_set(domain, isl_set_universe(space));
316 return domain;
319 /* Construct a map from the original instances to the corresponding
320 * cluster instance in the current bands of the clusters in "c".
322 static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx,
323 struct isl_sched_graph *graph, struct isl_clustering *c)
325 int i, j;
326 isl_space *space;
327 isl_union_map *cluster_map;
329 space = isl_space_params_alloc(ctx, 0);
330 cluster_map = isl_union_map_empty(space);
331 for (i = 0; i < graph->scc; ++i) {
332 int start, n;
333 isl_id *id;
335 if (!c->scc_in_merge[i])
336 continue;
338 id = cluster_id(ctx, c->scc_cluster[i]);
339 start = c->scc[i].band_start;
340 n = c->scc[i].n_total_row - start;
341 for (j = 0; j < c->scc[i].n; ++j) {
342 isl_multi_aff *ma;
343 isl_map *map;
344 struct isl_sched_node *node = &c->scc[i].node[j];
346 ma = isl_sched_node_extract_partial_schedule_multi_aff(
347 node, start, n);
348 ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out,
349 isl_id_copy(id));
350 map = isl_map_from_multi_aff(ma);
351 cluster_map = isl_union_map_add_map(cluster_map, map);
353 isl_id_free(id);
356 return cluster_map;
359 /* Add "umap" to the schedule constraints "sc" of all types of "edge"
360 * that are not isl_edge_condition or isl_edge_conditional_validity.
362 static __isl_give isl_schedule_constraints *add_non_conditional_constraints(
363 struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
364 __isl_take isl_schedule_constraints *sc)
366 enum isl_edge_type t;
368 if (!sc)
369 return NULL;
371 for (t = isl_edge_first; t <= isl_edge_last; ++t) {
372 if (t == isl_edge_condition ||
373 t == isl_edge_conditional_validity)
374 continue;
375 if (!isl_sched_edge_has_type(edge, t))
376 continue;
377 sc = isl_schedule_constraints_add(sc, t,
378 isl_union_map_copy(umap));
381 return sc;
384 /* Add schedule constraints of types isl_edge_condition and
385 * isl_edge_conditional_validity to "sc" by applying "umap" to
386 * the domains of the wrapped relations in domain and range
387 * of the corresponding tagged constraints of "edge".
389 static __isl_give isl_schedule_constraints *add_conditional_constraints(
390 struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
391 __isl_take isl_schedule_constraints *sc)
393 enum isl_edge_type t;
394 isl_union_map *tagged;
396 for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) {
397 if (!isl_sched_edge_has_type(edge, t))
398 continue;
399 if (t == isl_edge_condition)
400 tagged = isl_union_map_copy(edge->tagged_condition);
401 else
402 tagged = isl_union_map_copy(edge->tagged_validity);
403 tagged = isl_union_map_zip(tagged);
404 tagged = isl_union_map_apply_domain(tagged,
405 isl_union_map_copy(umap));
406 tagged = isl_union_map_zip(tagged);
407 sc = isl_schedule_constraints_add(sc, t, tagged);
408 if (!sc)
409 return NULL;
412 return sc;
415 /* Given a mapping "cluster_map" from the original instances to
416 * the cluster instances, add schedule constraints on the clusters
417 * to "sc" corresponding to the original constraints represented by "edge".
419 * For non-tagged dependence constraints, the cluster constraints
420 * are obtained by applying "cluster_map" to the edge->map.
422 * For tagged dependence constraints, "cluster_map" needs to be applied
423 * to the domains of the wrapped relations in domain and range
424 * of the tagged dependence constraints. Pick out the mappings
425 * from these domains from "cluster_map" and construct their product.
426 * This mapping can then be applied to the pair of domains.
428 static __isl_give isl_schedule_constraints *collect_edge_constraints(
429 struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map,
430 __isl_take isl_schedule_constraints *sc)
432 isl_union_map *umap;
433 isl_space *space;
434 isl_union_set *uset;
435 isl_union_map *umap1, *umap2;
437 if (!sc)
438 return NULL;
440 umap = isl_union_map_from_map(isl_map_copy(edge->map));
441 umap = isl_union_map_apply_domain(umap,
442 isl_union_map_copy(cluster_map));
443 umap = isl_union_map_apply_range(umap,
444 isl_union_map_copy(cluster_map));
445 sc = add_non_conditional_constraints(edge, umap, sc);
446 isl_union_map_free(umap);
448 if (!sc ||
449 (!isl_sched_edge_is_condition(edge) &&
450 !isl_sched_edge_is_conditional_validity(edge)))
451 return sc;
453 space = isl_space_domain(isl_map_get_space(edge->map));
454 uset = isl_union_set_from_set(isl_set_universe(space));
455 umap1 = isl_union_map_copy(cluster_map);
456 umap1 = isl_union_map_intersect_domain(umap1, uset);
457 space = isl_space_range(isl_map_get_space(edge->map));
458 uset = isl_union_set_from_set(isl_set_universe(space));
459 umap2 = isl_union_map_copy(cluster_map);
460 umap2 = isl_union_map_intersect_domain(umap2, uset);
461 umap = isl_union_map_product(umap1, umap2);
463 sc = add_conditional_constraints(edge, umap, sc);
465 isl_union_map_free(umap);
466 return sc;
469 /* Given a mapping "cluster_map" from the original instances to
470 * the cluster instances, add schedule constraints on the clusters
471 * to "sc" corresponding to all edges in "graph" between nodes that
472 * belong to SCCs that are marked for merging in "scc_in_merge".
474 static __isl_give isl_schedule_constraints *collect_constraints(
475 struct isl_sched_graph *graph, int *scc_in_merge,
476 __isl_keep isl_union_map *cluster_map,
477 __isl_take isl_schedule_constraints *sc)
479 int i;
481 for (i = 0; i < graph->n_edge; ++i) {
482 struct isl_sched_edge *edge = &graph->edge[i];
484 if (!scc_in_merge[edge->src->scc])
485 continue;
486 if (!scc_in_merge[edge->dst->scc])
487 continue;
488 sc = collect_edge_constraints(edge, cluster_map, sc);
491 return sc;
494 /* Construct a dependence graph for scheduling clusters with respect
495 * to each other and store the result in "merge_graph".
496 * In particular, the nodes of the graph correspond to the schedule
497 * dimensions of the current bands of those clusters that have been
498 * marked for merging in "c".
500 * First construct an isl_schedule_constraints object for this domain
501 * by transforming the edges in "graph" to the domain.
502 * Then initialize a dependence graph for scheduling from these
503 * constraints.
505 static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
506 struct isl_clustering *c, struct isl_sched_graph *merge_graph)
508 isl_union_set *domain;
509 isl_union_map *cluster_map;
510 isl_schedule_constraints *sc;
511 isl_stat r;
513 domain = collect_domain(ctx, graph, c);
514 sc = isl_schedule_constraints_on_domain(domain);
515 if (!sc)
516 return isl_stat_error;
517 cluster_map = collect_cluster_map(ctx, graph, c);
518 sc = collect_constraints(graph, c->scc_in_merge, cluster_map, sc);
519 isl_union_map_free(cluster_map);
521 r = isl_sched_graph_init(merge_graph, sc);
523 isl_schedule_constraints_free(sc);
525 return r;
528 /* Compute the maximal number of remaining schedule rows that still need
529 * to be computed for the nodes that belong to clusters with the maximal
530 * dimension for the current band (i.e., the band that is to be merged).
531 * Only clusters that are about to be merged are considered.
532 * "maxvar" is the maximal dimension for the current band.
533 * "c" contains information about the clusters.
535 * Return the maximal number of remaining schedule rows or
536 * isl_size_error on error.
538 static isl_size compute_maxvar_max_slack(int maxvar, struct isl_clustering *c)
540 int i, j;
541 int max_slack;
543 max_slack = 0;
544 for (i = 0; i < c->n; ++i) {
545 int nvar;
546 struct isl_sched_graph *scc;
548 if (!c->scc_in_merge[i])
549 continue;
550 scc = &c->scc[i];
551 nvar = scc->n_total_row - scc->band_start;
552 if (nvar != maxvar)
553 continue;
554 for (j = 0; j < scc->n; ++j) {
555 struct isl_sched_node *node = &scc->node[j];
556 int slack;
558 if (isl_sched_node_update_vmap(node) < 0)
559 return isl_size_error;
560 slack = node->nvar - node->rank;
561 if (slack > max_slack)
562 max_slack = slack;
566 return max_slack;
569 /* If there are any clusters where the dimension of the current band
570 * (i.e., the band that is to be merged) is smaller than "maxvar" and
571 * if there are any nodes in such a cluster where the number
572 * of remaining schedule rows that still need to be computed
573 * is greater than "max_slack", then return the smallest current band
574 * dimension of all these clusters. Otherwise return the original value
575 * of "maxvar". Return isl_size_error in case of any error.
576 * Only clusters that are about to be merged are considered.
577 * "c" contains information about the clusters.
579 static isl_size limit_maxvar_to_slack(int maxvar, int max_slack,
580 struct isl_clustering *c)
582 int i, j;
584 for (i = 0; i < c->n; ++i) {
585 int nvar;
586 struct isl_sched_graph *scc;
588 if (!c->scc_in_merge[i])
589 continue;
590 scc = &c->scc[i];
591 nvar = scc->n_total_row - scc->band_start;
592 if (nvar >= maxvar)
593 continue;
594 for (j = 0; j < scc->n; ++j) {
595 struct isl_sched_node *node = &scc->node[j];
596 int slack;
598 if (isl_sched_node_update_vmap(node) < 0)
599 return isl_size_error;
600 slack = node->nvar - node->rank;
601 if (slack > max_slack) {
602 maxvar = nvar;
603 break;
608 return maxvar;
611 /* Adjust merge_graph->maxvar based on the number of remaining schedule rows
612 * that still need to be computed. In particular, if there is a node
613 * in a cluster where the dimension of the current band is smaller
614 * than merge_graph->maxvar, but the number of remaining schedule rows
615 * is greater than that of any node in a cluster with the maximal
616 * dimension for the current band (i.e., merge_graph->maxvar),
617 * then adjust merge_graph->maxvar to the (smallest) current band dimension
618 * of those clusters. Without this adjustment, the total number of
619 * schedule dimensions would be increased, resulting in a skewed view
620 * of the number of coincident dimensions.
621 * "c" contains information about the clusters.
623 * If the maximize_band_depth option is set and merge_graph->maxvar is reduced,
624 * then there is no point in attempting any merge since it will be rejected
625 * anyway. Set merge_graph->maxvar to zero in such cases.
627 static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx,
628 struct isl_sched_graph *merge_graph, struct isl_clustering *c)
630 isl_size max_slack, maxvar;
632 max_slack = compute_maxvar_max_slack(merge_graph->maxvar, c);
633 if (max_slack < 0)
634 return isl_stat_error;
635 maxvar = limit_maxvar_to_slack(merge_graph->maxvar, max_slack, c);
636 if (maxvar < 0)
637 return isl_stat_error;
639 if (maxvar < merge_graph->maxvar) {
640 if (isl_options_get_schedule_maximize_band_depth(ctx))
641 merge_graph->maxvar = 0;
642 else
643 merge_graph->maxvar = maxvar;
646 return isl_stat_ok;
649 /* Return the number of coincident dimensions in the current band of "graph",
650 * where the nodes of "graph" are assumed to be scheduled by a single band.
652 static int get_n_coincident(struct isl_sched_graph *graph)
654 int i;
656 for (i = graph->band_start; i < graph->n_total_row; ++i)
657 if (!graph->node[0].coincident[i])
658 break;
660 return i - graph->band_start;
663 /* Should the clusters be merged based on the cluster schedule
664 * in the current (and only) band of "merge_graph", given that
665 * coincidence should be maximized?
667 * If the number of coincident schedule dimensions in the merged band
668 * would be less than the maximal number of coincident schedule dimensions
669 * in any of the merged clusters, then the clusters should not be merged.
671 static isl_bool ok_to_merge_coincident(struct isl_clustering *c,
672 struct isl_sched_graph *merge_graph)
674 int i;
675 int n_coincident;
676 int max_coincident;
678 max_coincident = 0;
679 for (i = 0; i < c->n; ++i) {
680 if (!c->scc_in_merge[i])
681 continue;
682 n_coincident = get_n_coincident(&c->scc[i]);
683 if (n_coincident > max_coincident)
684 max_coincident = n_coincident;
687 n_coincident = get_n_coincident(merge_graph);
689 return isl_bool_ok(n_coincident >= max_coincident);
692 /* Return the transformation on "node" expressed by the current (and only)
693 * band of "merge_graph" applied to the clusters in "c".
695 * First find the representation of "node" in its SCC in "c" and
696 * extract the transformation expressed by the current band.
697 * Then extract the transformation applied by "merge_graph"
698 * to the cluster to which this SCC belongs.
699 * Combine the two to obtain the complete transformation on the node.
701 * Note that the range of the first transformation is an anonymous space,
702 * while the domain of the second is named "cluster_X". The range
703 * of the former therefore needs to be adjusted before the two
704 * can be combined.
706 static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx,
707 struct isl_sched_node *node, struct isl_clustering *c,
708 struct isl_sched_graph *merge_graph)
710 struct isl_sched_node *scc_node, *cluster_node;
711 int start, n;
712 isl_id *id;
713 isl_space *space;
714 isl_multi_aff *ma, *ma2;
716 scc_node = isl_sched_graph_find_node(ctx, &c->scc[node->scc],
717 node->space);
718 if (scc_node && !isl_sched_graph_is_node(&c->scc[node->scc], scc_node))
719 isl_die(ctx, isl_error_internal, "unable to find node",
720 return NULL);
721 start = c->scc[node->scc].band_start;
722 n = c->scc[node->scc].n_total_row - start;
723 ma = isl_sched_node_extract_partial_schedule_multi_aff(scc_node,
724 start, n);
725 space = cluster_space(&c->scc[node->scc], c->scc_cluster[node->scc]);
726 cluster_node = isl_sched_graph_find_node(ctx, merge_graph, space);
727 if (cluster_node && !isl_sched_graph_is_node(merge_graph, cluster_node))
728 isl_die(ctx, isl_error_internal, "unable to find cluster",
729 space = isl_space_free(space));
730 id = isl_space_get_tuple_id(space, isl_dim_set);
731 ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, id);
732 isl_space_free(space);
733 n = merge_graph->n_total_row;
734 ma2 = isl_sched_node_extract_partial_schedule_multi_aff(cluster_node,
735 0, n);
736 ma = isl_multi_aff_pullback_multi_aff(ma2, ma);
738 return isl_map_from_multi_aff(ma);
741 /* Give a set of distances "set", are they bounded by a small constant
742 * in direction "pos"?
743 * In practice, check if they are bounded by 2 by checking that there
744 * are no elements with a value greater than or equal to 3 or
745 * smaller than or equal to -3.
747 static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos)
749 isl_bool bounded;
750 isl_set *test;
752 if (!set)
753 return isl_bool_error;
755 test = isl_set_copy(set);
756 test = isl_set_lower_bound_si(test, isl_dim_set, pos, 3);
757 bounded = isl_set_is_empty(test);
758 isl_set_free(test);
760 if (bounded < 0 || !bounded)
761 return bounded;
763 test = isl_set_copy(set);
764 test = isl_set_upper_bound_si(test, isl_dim_set, pos, -3);
765 bounded = isl_set_is_empty(test);
766 isl_set_free(test);
768 return bounded;
771 /* Does the set "set" have a fixed (but possible parametric) value
772 * at dimension "pos"?
774 static isl_bool has_single_value(__isl_keep isl_set *set, int pos)
776 isl_size n;
777 isl_bool single;
779 n = isl_set_dim(set, isl_dim_set);
780 if (n < 0)
781 return isl_bool_error;
782 set = isl_set_copy(set);
783 set = isl_set_project_out(set, isl_dim_set, pos + 1, n - (pos + 1));
784 set = isl_set_project_out(set, isl_dim_set, 0, pos);
785 single = isl_set_is_singleton(set);
786 isl_set_free(set);
788 return single;
791 /* Does "map" have a fixed (but possible parametric) value
792 * at dimension "pos" of either its domain or its range?
794 static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos)
796 isl_set *set;
797 isl_bool single;
799 set = isl_map_domain(isl_map_copy(map));
800 single = has_single_value(set, pos);
801 isl_set_free(set);
803 if (single < 0 || single)
804 return single;
806 set = isl_map_range(isl_map_copy(map));
807 single = has_single_value(set, pos);
808 isl_set_free(set);
810 return single;
813 /* Does the edge "edge" from "graph" have bounded dependence distances
814 * in the merged graph "merge_graph" of a selection of clusters in "c"?
816 * Extract the complete transformations of the source and destination
817 * nodes of the edge, apply them to the edge constraints and
818 * compute the differences. Finally, check if these differences are bounded
819 * in each direction.
821 * If the dimension of the band is greater than the number of
822 * dimensions that can be expected to be optimized by the edge
823 * (based on its weight), then also allow the differences to be unbounded
824 * in the remaining dimensions, but only if either the source or
825 * the destination has a fixed value in that direction.
826 * This allows a statement that produces values that are used by
827 * several instances of another statement to be merged with that
828 * other statement.
829 * However, merging such clusters will introduce an inherently
830 * large proximity distance inside the merged cluster, meaning
831 * that proximity distances will no longer be optimized in
832 * subsequent merges. These merges are therefore only allowed
833 * after all other possible merges have been tried.
834 * The first time such a merge is encountered, the weight of the edge
835 * is replaced by a negative weight. The second time (i.e., after
836 * all merges over edges with a non-negative weight have been tried),
837 * the merge is allowed.
839 static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge,
840 struct isl_sched_graph *graph, struct isl_clustering *c,
841 struct isl_sched_graph *merge_graph)
843 int i, n_slack;
844 isl_size n;
845 isl_bool bounded;
846 isl_map *map, *t;
847 isl_set *dist;
849 map = isl_map_copy(edge->map);
850 t = extract_node_transformation(ctx, edge->src, c, merge_graph);
851 map = isl_map_apply_domain(map, t);
852 t = extract_node_transformation(ctx, edge->dst, c, merge_graph);
853 map = isl_map_apply_range(map, t);
854 dist = isl_map_deltas(isl_map_copy(map));
856 bounded = isl_bool_true;
857 n = isl_set_dim(dist, isl_dim_set);
858 if (n < 0)
859 goto error;
860 n_slack = n - edge->weight;
861 if (edge->weight < 0)
862 n_slack -= graph->max_weight + 1;
863 for (i = 0; i < n; ++i) {
864 isl_bool bounded_i, singular_i;
866 bounded_i = distance_is_bounded(dist, i);
867 if (bounded_i < 0)
868 goto error;
869 if (bounded_i)
870 continue;
871 if (edge->weight >= 0)
872 bounded = isl_bool_false;
873 n_slack--;
874 if (n_slack < 0)
875 break;
876 singular_i = has_singular_src_or_dst(map, i);
877 if (singular_i < 0)
878 goto error;
879 if (singular_i)
880 continue;
881 bounded = isl_bool_false;
882 break;
884 if (!bounded && i >= n && edge->weight >= 0)
885 edge->weight -= graph->max_weight + 1;
886 isl_map_free(map);
887 isl_set_free(dist);
889 return bounded;
890 error:
891 isl_map_free(map);
892 isl_set_free(dist);
893 return isl_bool_error;
896 /* Should the clusters be merged based on the cluster schedule
897 * in the current (and only) band of "merge_graph"?
898 * "graph" is the original dependence graph, while "c" records
899 * which SCCs are involved in the latest merge.
901 * In particular, is there at least one proximity constraint
902 * that is optimized by the merge?
904 * A proximity constraint is considered to be optimized
905 * if the dependence distances are small.
907 static isl_bool ok_to_merge_proximity(isl_ctx *ctx,
908 struct isl_sched_graph *graph, struct isl_clustering *c,
909 struct isl_sched_graph *merge_graph)
911 int i;
913 for (i = 0; i < graph->n_edge; ++i) {
914 struct isl_sched_edge *edge = &graph->edge[i];
915 isl_bool bounded;
917 if (!isl_sched_edge_is_proximity(edge))
918 continue;
919 if (!c->scc_in_merge[edge->src->scc])
920 continue;
921 if (!c->scc_in_merge[edge->dst->scc])
922 continue;
923 if (c->scc_cluster[edge->dst->scc] ==
924 c->scc_cluster[edge->src->scc])
925 continue;
926 bounded = has_bounded_distances(ctx, edge, graph, c,
927 merge_graph);
928 if (bounded < 0 || bounded)
929 return bounded;
932 return isl_bool_false;
935 /* Should the clusters be merged based on the cluster schedule
936 * in the current (and only) band of "merge_graph"?
937 * "graph" is the original dependence graph, while "c" records
938 * which SCCs are involved in the latest merge.
940 * If the current band is empty, then the clusters should not be merged.
942 * If the band depth should be maximized and the merge schedule
943 * is incomplete (meaning that the dimension of some of the schedule
944 * bands in the original schedule will be reduced), then the clusters
945 * should not be merged.
947 * If the schedule_maximize_coincidence option is set, then check that
948 * the number of coincident schedule dimensions is not reduced.
950 * Finally, only allow the merge if at least one proximity
951 * constraint is optimized.
953 static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
954 struct isl_clustering *c, struct isl_sched_graph *merge_graph)
956 if (merge_graph->n_total_row == merge_graph->band_start)
957 return isl_bool_false;
959 if (isl_options_get_schedule_maximize_band_depth(ctx) &&
960 merge_graph->n_total_row < merge_graph->maxvar)
961 return isl_bool_false;
963 if (isl_options_get_schedule_maximize_coincidence(ctx)) {
964 isl_bool ok;
966 ok = ok_to_merge_coincident(c, merge_graph);
967 if (ok < 0 || !ok)
968 return ok;
971 return ok_to_merge_proximity(ctx, graph, c, merge_graph);
974 /* Apply the schedule in "t_node" to the "n" rows starting at "first"
975 * of the schedule in "node" and return the result.
977 * That is, essentially compute
979 * T * N(first:first+n-1)
981 * taking into account the constant term and the parameter coefficients
982 * in "t_node".
984 static __isl_give isl_mat *node_transformation(isl_ctx *ctx,
985 struct isl_sched_node *t_node, struct isl_sched_node *node,
986 int first, int n)
988 int i, j;
989 isl_mat *t;
990 isl_size n_row, n_col;
991 int n_param, n_var;
993 n_param = node->nparam;
994 n_var = node->nvar;
995 n_row = isl_mat_rows(t_node->sched);
996 n_col = isl_mat_cols(node->sched);
997 if (n_row < 0 || n_col < 0)
998 return NULL;
999 t = isl_mat_alloc(ctx, n_row, n_col);
1000 if (!t)
1001 return NULL;
1002 for (i = 0; i < n_row; ++i) {
1003 isl_seq_cpy(t->row[i], t_node->sched->row[i], 1 + n_param);
1004 isl_seq_clr(t->row[i] + 1 + n_param, n_var);
1005 for (j = 0; j < n; ++j)
1006 isl_seq_addmul(t->row[i],
1007 t_node->sched->row[i][1 + n_param + j],
1008 node->sched->row[first + j],
1009 1 + n_param + n_var);
1011 return t;
1014 /* Apply the cluster schedule in "t_node" to the current band
1015 * schedule of the nodes in "graph".
1017 * In particular, replace the rows starting at band_start
1018 * by the result of applying the cluster schedule in "t_node"
1019 * to the original rows.
1021 * The coincidence of the schedule is determined by the coincidence
1022 * of the cluster schedule.
1024 static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph,
1025 struct isl_sched_node *t_node)
1027 int i, j;
1028 isl_size n_new;
1029 int start, n;
1031 start = graph->band_start;
1032 n = graph->n_total_row - start;
1034 n_new = isl_mat_rows(t_node->sched);
1035 if (n_new < 0)
1036 return isl_stat_error;
1037 for (i = 0; i < graph->n; ++i) {
1038 struct isl_sched_node *node = &graph->node[i];
1039 isl_mat *t;
1041 t = node_transformation(ctx, t_node, node, start, n);
1042 node->sched = isl_mat_drop_rows(node->sched, start, n);
1043 node->sched = isl_mat_concat(node->sched, t);
1044 node->sched_map = isl_map_free(node->sched_map);
1045 if (!node->sched)
1046 return isl_stat_error;
1047 for (j = 0; j < n_new; ++j)
1048 node->coincident[start + j] = t_node->coincident[j];
1050 graph->n_total_row -= n;
1051 graph->n_row -= n;
1052 graph->n_total_row += n_new;
1053 graph->n_row += n_new;
1055 return isl_stat_ok;
1058 /* Merge the clusters marked for merging in "c" into a single
1059 * cluster using the cluster schedule in the current band of "merge_graph".
1060 * The representative SCC for the new cluster is the SCC with
1061 * the smallest index.
1063 * The current band schedule of each SCC in the new cluster is obtained
1064 * by applying the schedule of the corresponding original cluster
1065 * to the original band schedule.
1066 * All SCCs in the new cluster have the same number of schedule rows.
1068 static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c,
1069 struct isl_sched_graph *merge_graph)
1071 int i;
1072 int cluster = -1;
1073 isl_space *space;
1075 for (i = 0; i < c->n; ++i) {
1076 struct isl_sched_node *node;
1078 if (!c->scc_in_merge[i])
1079 continue;
1080 if (cluster < 0)
1081 cluster = i;
1082 space = cluster_space(&c->scc[i], c->scc_cluster[i]);
1083 node = isl_sched_graph_find_node(ctx, merge_graph, space);
1084 isl_space_free(space);
1085 if (!node)
1086 return isl_stat_error;
1087 if (!isl_sched_graph_is_node(merge_graph, node))
1088 isl_die(ctx, isl_error_internal,
1089 "unable to find cluster",
1090 return isl_stat_error);
1091 if (transform(ctx, &c->scc[i], node) < 0)
1092 return isl_stat_error;
1093 c->scc_cluster[i] = cluster;
1096 return isl_stat_ok;
1099 /* Try and merge the clusters of SCCs marked in c->scc_in_merge
1100 * by scheduling the current cluster bands with respect to each other.
1102 * Construct a dependence graph with a space for each cluster and
1103 * with the coordinates of each space corresponding to the schedule
1104 * dimensions of the current band of that cluster.
1105 * Construct a cluster schedule in this cluster dependence graph and
1106 * apply it to the current cluster bands if it is applicable
1107 * according to ok_to_merge.
1109 * If the number of remaining schedule dimensions in a cluster
1110 * with a non-maximal current schedule dimension is greater than
1111 * the number of remaining schedule dimensions in clusters
1112 * with a maximal current schedule dimension, then restrict
1113 * the number of rows to be computed in the cluster schedule
1114 * to the minimal such non-maximal current schedule dimension.
1115 * Do this by adjusting merge_graph.maxvar.
1117 * Return isl_bool_true if the clusters have effectively been merged
1118 * into a single cluster.
1120 * Note that since the standard scheduling algorithm minimizes the maximal
1121 * distance over proximity constraints, the proximity constraints between
1122 * the merged clusters may not be optimized any further than what is
1123 * sufficient to bring the distances within the limits of the internal
1124 * proximity constraints inside the individual clusters.
1125 * It may therefore make sense to perform an additional translation step
1126 * to bring the clusters closer to each other, while maintaining
1127 * the linear part of the merging schedule found using the standard
1128 * scheduling algorithm.
1130 static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
1131 struct isl_clustering *c)
1133 struct isl_sched_graph merge_graph = { 0 };
1134 isl_bool merged;
1136 if (init_merge_graph(ctx, graph, c, &merge_graph) < 0)
1137 goto error;
1139 if (isl_sched_graph_compute_maxvar(&merge_graph) < 0)
1140 goto error;
1141 if (adjust_maxvar_to_slack(ctx, &merge_graph,c) < 0)
1142 goto error;
1143 if (isl_schedule_node_compute_wcc_band(ctx, &merge_graph) < 0)
1144 goto error;
1145 merged = ok_to_merge(ctx, graph, c, &merge_graph);
1146 if (merged && merge(ctx, c, &merge_graph) < 0)
1147 goto error;
1149 isl_sched_graph_free(ctx, &merge_graph);
1150 return merged;
1151 error:
1152 isl_sched_graph_free(ctx, &merge_graph);
1153 return isl_bool_error;
1156 /* Is there any edge marked "no_merge" between two SCCs that are
1157 * about to be merged (i.e., that are set in "scc_in_merge")?
1158 * "merge_edge" is the proximity edge along which the clusters of SCCs
1159 * are going to be merged.
1161 * If there is any edge between two SCCs with a negative weight,
1162 * while the weight of "merge_edge" is non-negative, then this
1163 * means that the edge was postponed. "merge_edge" should then
1164 * also be postponed since merging along the edge with negative weight should
1165 * be postponed until all edges with non-negative weight have been tried.
1166 * Replace the weight of "merge_edge" by a negative weight as well and
1167 * tell the caller not to attempt a merge.
1169 static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge,
1170 struct isl_sched_edge *merge_edge)
1172 int i;
1174 for (i = 0; i < graph->n_edge; ++i) {
1175 struct isl_sched_edge *edge = &graph->edge[i];
1177 if (!scc_in_merge[edge->src->scc])
1178 continue;
1179 if (!scc_in_merge[edge->dst->scc])
1180 continue;
1181 if (edge->no_merge)
1182 return 1;
1183 if (merge_edge->weight >= 0 && edge->weight < 0) {
1184 merge_edge->weight -= graph->max_weight + 1;
1185 return 1;
1189 return 0;
1192 /* Merge the two clusters in "c" connected by the edge in "graph"
1193 * with index "edge" into a single cluster.
1194 * If it turns out to be impossible to merge these two clusters,
1195 * then mark the edge as "no_merge" such that it will not be
1196 * considered again.
1198 * First mark all SCCs that need to be merged. This includes the SCCs
1199 * in the two clusters, but it may also include the SCCs
1200 * of intermediate clusters.
1201 * If there is already a no_merge edge between any pair of such SCCs,
1202 * then simply mark the current edge as no_merge as well.
1203 * Likewise, if any of those edges was postponed by has_bounded_distances,
1204 * then postpone the current edge as well.
1205 * Otherwise, try and merge the clusters and mark "edge" as "no_merge"
1206 * if the clusters did not end up getting merged, unless the non-merge
1207 * is due to the fact that the edge was postponed. This postponement
1208 * can be recognized by a change in weight (from non-negative to negative).
1210 static isl_stat merge_clusters_along_edge(isl_ctx *ctx,
1211 struct isl_sched_graph *graph, int edge, struct isl_clustering *c)
1213 isl_bool merged;
1214 int edge_weight = graph->edge[edge].weight;
1216 if (mark_merge_sccs(ctx, graph, edge, c) < 0)
1217 return isl_stat_error;
1219 if (any_no_merge(graph, c->scc_in_merge, &graph->edge[edge]))
1220 merged = isl_bool_false;
1221 else
1222 merged = try_merge(ctx, graph, c);
1223 if (merged < 0)
1224 return isl_stat_error;
1225 if (!merged && edge_weight == graph->edge[edge].weight)
1226 graph->edge[edge].no_merge = 1;
1228 return isl_stat_ok;
1231 /* Does "node" belong to the cluster identified by "cluster"?
1233 static int node_cluster_exactly(struct isl_sched_node *node, int cluster)
1235 return node->cluster == cluster;
1238 /* Does "edge" connect two nodes belonging to the cluster
1239 * identified by "cluster"?
1241 static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster)
1243 return edge->src->cluster == cluster && edge->dst->cluster == cluster;
1246 /* Swap the schedule of "node1" and "node2".
1247 * Both nodes have been derived from the same node in a common parent graph.
1248 * Since the "coincident" field is shared with that node
1249 * in the parent graph, there is no need to also swap this field.
1251 static void swap_sched(struct isl_sched_node *node1,
1252 struct isl_sched_node *node2)
1254 isl_mat *sched;
1255 isl_map *sched_map;
1257 sched = node1->sched;
1258 node1->sched = node2->sched;
1259 node2->sched = sched;
1261 sched_map = node1->sched_map;
1262 node1->sched_map = node2->sched_map;
1263 node2->sched_map = sched_map;
1266 /* Copy the current band schedule from the SCCs that form the cluster
1267 * with index "pos" to the actual cluster at position "pos".
1268 * By construction, the index of the first SCC that belongs to the cluster
1269 * is also "pos".
1271 * The order of the nodes inside both the SCCs and the cluster
1272 * is assumed to be same as the order in the original "graph".
1274 * Since the SCC graphs will no longer be used after this function,
1275 * the schedules are actually swapped rather than copied.
1277 static isl_stat copy_partial(struct isl_sched_graph *graph,
1278 struct isl_clustering *c, int pos)
1280 int i, j;
1282 c->cluster[pos].n_total_row = c->scc[pos].n_total_row;
1283 c->cluster[pos].n_row = c->scc[pos].n_row;
1284 c->cluster[pos].maxvar = c->scc[pos].maxvar;
1285 j = 0;
1286 for (i = 0; i < graph->n; ++i) {
1287 int k;
1288 int s;
1290 if (graph->node[i].cluster != pos)
1291 continue;
1292 s = graph->node[i].scc;
1293 k = c->scc_node[s]++;
1294 swap_sched(&c->cluster[pos].node[j], &c->scc[s].node[k]);
1295 if (c->scc[s].maxvar > c->cluster[pos].maxvar)
1296 c->cluster[pos].maxvar = c->scc[s].maxvar;
1297 ++j;
1300 return isl_stat_ok;
1303 /* Is there a (conditional) validity dependence from node[j] to node[i],
1304 * forcing node[i] to follow node[j] or do the nodes belong to the same
1305 * cluster?
1307 static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user)
1309 struct isl_sched_graph *graph = user;
1311 if (graph->node[i].cluster == graph->node[j].cluster)
1312 return isl_bool_true;
1313 return isl_sched_graph_has_validity_edge(graph, &graph->node[j],
1314 &graph->node[i]);
1317 /* Extract the merged clusters of SCCs in "graph", sort them, and
1318 * store them in c->clusters. Update c->scc_cluster accordingly.
1320 * First keep track of the cluster containing the SCC to which a node
1321 * belongs in the node itself.
1322 * Then extract the clusters into c->clusters, copying the current
1323 * band schedule from the SCCs that belong to the cluster.
1324 * Do this only once per cluster.
1326 * Finally, topologically sort the clusters and update c->scc_cluster
1327 * to match the new scc numbering. While the SCCs were originally
1328 * sorted already, some SCCs that depend on some other SCCs may
1329 * have been merged with SCCs that appear before these other SCCs.
1330 * A reordering may therefore be required.
1332 static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph,
1333 struct isl_clustering *c)
1335 int i;
1337 for (i = 0; i < graph->n; ++i)
1338 graph->node[i].cluster = c->scc_cluster[graph->node[i].scc];
1340 for (i = 0; i < graph->scc; ++i) {
1341 if (c->scc_cluster[i] != i)
1342 continue;
1343 if (isl_sched_graph_extract_sub_graph(ctx, graph,
1344 &node_cluster_exactly,
1345 &edge_cluster_exactly, i, &c->cluster[i]) < 0)
1346 return isl_stat_error;
1347 c->cluster[i].src_scc = -1;
1348 c->cluster[i].dst_scc = -1;
1349 if (copy_partial(graph, c, i) < 0)
1350 return isl_stat_error;
1353 if (isl_sched_graph_detect_ccs(ctx, graph,
1354 &node_follows_strong_or_same_cluster) < 0)
1355 return isl_stat_error;
1356 for (i = 0; i < graph->n; ++i)
1357 c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster;
1359 return isl_stat_ok;
1362 /* Compute weights on the proximity edges of "graph" that can
1363 * be used by find_proximity to find the most appropriate
1364 * proximity edge to use to merge two clusters in "c".
1365 * The weights are also used by has_bounded_distances to determine
1366 * whether the merge should be allowed.
1367 * Store the maximum of the computed weights in graph->max_weight.
1369 * The computed weight is a measure for the number of remaining schedule
1370 * dimensions that can still be completely aligned.
1371 * In particular, compute the number of equalities between
1372 * input dimensions and output dimensions in the proximity constraints.
1373 * The directions that are already handled by outer schedule bands
1374 * are projected out prior to determining this number.
1376 * Edges that will never be considered by find_proximity are ignored.
1378 static isl_stat compute_weights(struct isl_sched_graph *graph,
1379 struct isl_clustering *c)
1381 int i;
1383 graph->max_weight = 0;
1385 for (i = 0; i < graph->n_edge; ++i) {
1386 struct isl_sched_edge *edge = &graph->edge[i];
1387 struct isl_sched_node *src = edge->src;
1388 struct isl_sched_node *dst = edge->dst;
1389 isl_basic_map *hull;
1390 isl_bool prox;
1391 isl_size n_in, n_out, n;
1393 prox = is_non_empty_proximity(edge);
1394 if (prox < 0)
1395 return isl_stat_error;
1396 if (!prox)
1397 continue;
1398 if (bad_cluster(&c->scc[edge->src->scc]) ||
1399 bad_cluster(&c->scc[edge->dst->scc]))
1400 continue;
1401 if (c->scc_cluster[edge->dst->scc] ==
1402 c->scc_cluster[edge->src->scc])
1403 continue;
1405 hull = isl_map_affine_hull(isl_map_copy(edge->map));
1406 hull = isl_basic_map_transform_dims(hull, isl_dim_in, 0,
1407 isl_mat_copy(src->vmap));
1408 hull = isl_basic_map_transform_dims(hull, isl_dim_out, 0,
1409 isl_mat_copy(dst->vmap));
1410 hull = isl_basic_map_project_out(hull,
1411 isl_dim_in, 0, src->rank);
1412 hull = isl_basic_map_project_out(hull,
1413 isl_dim_out, 0, dst->rank);
1414 hull = isl_basic_map_remove_divs(hull);
1415 n_in = isl_basic_map_dim(hull, isl_dim_in);
1416 n_out = isl_basic_map_dim(hull, isl_dim_out);
1417 if (n_in < 0 || n_out < 0)
1418 hull = isl_basic_map_free(hull);
1419 hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
1420 isl_dim_in, 0, n_in);
1421 hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
1422 isl_dim_out, 0, n_out);
1423 n = isl_basic_map_n_equality(hull);
1424 isl_basic_map_free(hull);
1425 if (n < 0)
1426 return isl_stat_error;
1427 edge->weight = n;
1429 if (edge->weight > graph->max_weight)
1430 graph->max_weight = edge->weight;
1433 return isl_stat_ok;
1436 /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c" and
1437 * update "node" to arrange for them to be executed in an order
1438 * possibly involving set nodes that generalizes the topological order
1439 * determined by the scc fields of the nodes in "graph".
1441 * Note that at this stage, there are graph->scc clusters and
1442 * their positions in c->cluster are determined by the values
1443 * of c->scc_cluster.
1445 * Construct an isl_scc_graph and perform the decomposition
1446 * using this graph.
1448 static __isl_give isl_schedule_node *finish_bands_decompose(
1449 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
1450 struct isl_clustering *c)
1452 isl_ctx *ctx;
1453 struct isl_scc_graph *scc_graph;
1455 ctx = isl_schedule_node_get_ctx(node);
1457 scc_graph = isl_scc_graph_from_sched_graph(ctx, graph, c);
1458 node = isl_scc_graph_decompose(scc_graph, node);
1459 isl_scc_graph_free(scc_graph);
1461 return node;
1464 /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c"
1465 * in their topological order. This order is determined by the scc
1466 * fields of the nodes in "graph".
1467 * Combine the results in a sequence expressing the topological order.
1469 * If there is only one cluster left, then there is no need to introduce
1470 * a sequence node. Also, in this case, the cluster necessarily contains
1471 * the SCC at position 0 in the original graph and is therefore also
1472 * stored in the first cluster of "c".
1474 * If there are more than two clusters left, then some subsets of the clusters
1475 * may still be independent of each other. These could then still
1476 * be reordered with respect to each other. Call finish_bands_decompose
1477 * to try and construct an ordering involving set and sequence nodes
1478 * that generalizes the topological order.
1479 * Note that at the outermost level there can be no independent components
1480 * because isl_schedule_node_compute_wcc_clustering is called
1481 * on a (weakly) connected component.
1483 static __isl_give isl_schedule_node *finish_bands_clustering(
1484 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
1485 struct isl_clustering *c)
1487 int i;
1488 isl_ctx *ctx;
1489 isl_union_set_list *filters;
1491 if (graph->scc == 1)
1492 return isl_schedule_node_compute_finish_band(node,
1493 &c->cluster[0], 0);
1494 if (graph->scc > 2)
1495 return finish_bands_decompose(node, graph, c);
1497 ctx = isl_schedule_node_get_ctx(node);
1499 filters = isl_sched_graph_extract_sccs(ctx, graph);
1500 node = isl_schedule_node_insert_sequence(node, filters);
1502 for (i = 0; i < graph->scc; ++i) {
1503 int j = c->scc_cluster[i];
1504 node = isl_schedule_node_grandchild(node, i, 0);
1505 node = isl_schedule_node_compute_finish_band(node,
1506 &c->cluster[j], 0);
1507 node = isl_schedule_node_grandparent(node);
1510 return node;
1513 /* Compute a schedule for a connected dependence graph by first considering
1514 * each strongly connected component (SCC) in the graph separately and then
1515 * incrementally combining them into clusters.
1516 * Return the updated schedule node.
1518 * Initially, each cluster consists of a single SCC, each with its
1519 * own band schedule. The algorithm then tries to merge pairs
1520 * of clusters along a proximity edge until no more suitable
1521 * proximity edges can be found. During this merging, the schedule
1522 * is maintained in the individual SCCs.
1523 * After the merging is completed, the full resulting clusters
1524 * are extracted and in finish_bands_clustering,
1525 * isl_schedule_node_compute_finish_band is called on each of them to integrate
1526 * the band into "node" and to continue the computation.
1528 * compute_weights initializes the weights that are used by find_proximity.
1530 __isl_give isl_schedule_node *isl_schedule_node_compute_wcc_clustering(
1531 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
1533 isl_ctx *ctx;
1534 struct isl_clustering c;
1535 int i;
1537 ctx = isl_schedule_node_get_ctx(node);
1539 if (clustering_init(ctx, &c, graph) < 0)
1540 goto error;
1542 if (compute_weights(graph, &c) < 0)
1543 goto error;
1545 for (;;) {
1546 i = find_proximity(graph, &c);
1547 if (i < 0)
1548 goto error;
1549 if (i >= graph->n_edge)
1550 break;
1551 if (merge_clusters_along_edge(ctx, graph, i, &c) < 0)
1552 goto error;
1555 if (extract_clusters(ctx, graph, &c) < 0)
1556 goto error;
1558 node = finish_bands_clustering(node, graph, &c);
1560 clustering_free(ctx, &c);
1561 return node;
1562 error:
1563 clustering_free(ctx, &c);
1564 return isl_schedule_node_free(node);