1 /* Data references and dependences detectors.
2 Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
49 - to define an interface to access this data.
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
62 has an integer solution x = 1 and y = -1.
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
76 #define INCLUDE_ALGORITHM
77 #define INCLUDE_MEMORY
80 #include "coretypes.h"
85 #include "gimple-pretty-print.h"
87 #include "fold-const.h"
89 #include "gimple-iterator.h"
90 #include "tree-ssa-loop-niter.h"
91 #include "tree-ssa-loop.h"
94 #include "tree-data-ref.h"
95 #include "tree-scalar-evolution.h"
97 #include "tree-affine.h"
101 #include "internal-fn.h"
102 #include "vr-values.h"
103 #include "range-op.h"
104 #include "tree-ssa-loop-ivopts.h"
107 static struct datadep_stats
109 int num_dependence_tests
;
110 int num_dependence_dependent
;
111 int num_dependence_independent
;
112 int num_dependence_undetermined
;
114 int num_subscript_tests
;
115 int num_subscript_undetermined
;
116 int num_same_subscript_function
;
119 int num_ziv_independent
;
120 int num_ziv_dependent
;
121 int num_ziv_unimplemented
;
124 int num_siv_independent
;
125 int num_siv_dependent
;
126 int num_siv_unimplemented
;
129 int num_miv_independent
;
130 int num_miv_dependent
;
131 int num_miv_unimplemented
;
134 static bool subscript_dependence_tester_1 (struct data_dependence_relation
*,
135 unsigned int, unsigned int,
137 /* Returns true iff A divides B. */
140 tree_fold_divides_p (const_tree a
, const_tree b
)
142 gcc_assert (TREE_CODE (a
) == INTEGER_CST
);
143 gcc_assert (TREE_CODE (b
) == INTEGER_CST
);
144 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR
, b
, a
));
147 /* Returns true iff A divides B. */
150 int_divides_p (lambda_int a
, lambda_int b
)
152 return ((b
% a
) == 0);
155 /* Return true if reference REF contains a union access. */
158 ref_contains_union_access_p (tree ref
)
160 while (handled_component_p (ref
))
162 ref
= TREE_OPERAND (ref
, 0);
163 if (TREE_CODE (TREE_TYPE (ref
)) == UNION_TYPE
164 || TREE_CODE (TREE_TYPE (ref
)) == QUAL_UNION_TYPE
)
172 /* Dump into FILE all the data references from DATAREFS. */
175 dump_data_references (FILE *file
, vec
<data_reference_p
> datarefs
)
177 for (data_reference
*dr
: datarefs
)
178 dump_data_reference (file
, dr
);
181 /* Unified dump into FILE all the data references from DATAREFS. */
184 debug (vec
<data_reference_p
> &ref
)
186 dump_data_references (stderr
, ref
);
190 debug (vec
<data_reference_p
> *ptr
)
195 fprintf (stderr
, "<nil>\n");
199 /* Dump into STDERR all the data references from DATAREFS. */
202 debug_data_references (vec
<data_reference_p
> datarefs
)
204 dump_data_references (stderr
, datarefs
);
207 /* Print to STDERR the data_reference DR. */
210 debug_data_reference (struct data_reference
*dr
)
212 dump_data_reference (stderr
, dr
);
215 /* Dump function for a DATA_REFERENCE structure. */
218 dump_data_reference (FILE *outf
,
219 struct data_reference
*dr
)
223 fprintf (outf
, "#(Data Ref: \n");
224 fprintf (outf
, "# bb: %d \n", gimple_bb (DR_STMT (dr
))->index
);
225 fprintf (outf
, "# stmt: ");
226 print_gimple_stmt (outf
, DR_STMT (dr
), 0);
227 fprintf (outf
, "# ref: ");
228 print_generic_stmt (outf
, DR_REF (dr
));
229 fprintf (outf
, "# base_object: ");
230 print_generic_stmt (outf
, DR_BASE_OBJECT (dr
));
232 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
234 fprintf (outf
, "# Access function %d: ", i
);
235 print_generic_stmt (outf
, DR_ACCESS_FN (dr
, i
));
237 fprintf (outf
, "#)\n");
240 /* Unified dump function for a DATA_REFERENCE structure. */
243 debug (data_reference
&ref
)
245 dump_data_reference (stderr
, &ref
);
249 debug (data_reference
*ptr
)
254 fprintf (stderr
, "<nil>\n");
258 /* Dumps the affine function described by FN to the file OUTF. */
261 dump_affine_function (FILE *outf
, affine_fn fn
)
266 print_generic_expr (outf
, fn
[0], TDF_SLIM
);
267 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
269 fprintf (outf
, " + ");
270 print_generic_expr (outf
, coef
, TDF_SLIM
);
271 fprintf (outf
, " * x_%u", i
);
275 /* Dumps the conflict function CF to the file OUTF. */
278 dump_conflict_function (FILE *outf
, conflict_function
*cf
)
282 if (cf
->n
== NO_DEPENDENCE
)
283 fprintf (outf
, "no dependence");
284 else if (cf
->n
== NOT_KNOWN
)
285 fprintf (outf
, "not known");
288 for (i
= 0; i
< cf
->n
; i
++)
293 dump_affine_function (outf
, cf
->fns
[i
]);
299 /* Dump function for a SUBSCRIPT structure. */
302 dump_subscript (FILE *outf
, struct subscript
*subscript
)
304 conflict_function
*cf
= SUB_CONFLICTS_IN_A (subscript
);
306 fprintf (outf
, "\n (subscript \n");
307 fprintf (outf
, " iterations_that_access_an_element_twice_in_A: ");
308 dump_conflict_function (outf
, cf
);
309 if (CF_NONTRIVIAL_P (cf
))
311 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
312 fprintf (outf
, "\n last_conflict: ");
313 print_generic_expr (outf
, last_iteration
);
316 cf
= SUB_CONFLICTS_IN_B (subscript
);
317 fprintf (outf
, "\n iterations_that_access_an_element_twice_in_B: ");
318 dump_conflict_function (outf
, cf
);
319 if (CF_NONTRIVIAL_P (cf
))
321 tree last_iteration
= SUB_LAST_CONFLICT (subscript
);
322 fprintf (outf
, "\n last_conflict: ");
323 print_generic_expr (outf
, last_iteration
);
326 fprintf (outf
, "\n (Subscript distance: ");
327 print_generic_expr (outf
, SUB_DISTANCE (subscript
));
328 fprintf (outf
, " ))\n");
331 /* Print the classic direction vector DIRV to OUTF. */
334 print_direction_vector (FILE *outf
,
340 for (eq
= 0; eq
< length
; eq
++)
342 enum data_dependence_direction dir
= ((enum data_dependence_direction
)
348 fprintf (outf
, " +");
351 fprintf (outf
, " -");
354 fprintf (outf
, " =");
356 case dir_positive_or_equal
:
357 fprintf (outf
, " +=");
359 case dir_positive_or_negative
:
360 fprintf (outf
, " +-");
362 case dir_negative_or_equal
:
363 fprintf (outf
, " -=");
366 fprintf (outf
, " *");
369 fprintf (outf
, "indep");
373 fprintf (outf
, "\n");
376 /* Print a vector of direction vectors. */
379 print_dir_vectors (FILE *outf
, vec
<lambda_vector
> dir_vects
,
382 for (lambda_vector v
: dir_vects
)
383 print_direction_vector (outf
, v
, length
);
386 /* Print out a vector VEC of length N to OUTFILE. */
389 print_lambda_vector (FILE * outfile
, lambda_vector vector
, int n
)
393 for (i
= 0; i
< n
; i
++)
394 fprintf (outfile
, HOST_WIDE_INT_PRINT_DEC
" ", vector
[i
]);
395 fprintf (outfile
, "\n");
398 /* Print a vector of distance vectors. */
401 print_dist_vectors (FILE *outf
, vec
<lambda_vector
> dist_vects
,
404 for (lambda_vector v
: dist_vects
)
405 print_lambda_vector (outf
, v
, length
);
408 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
411 dump_data_dependence_relation (FILE *outf
, const data_dependence_relation
*ddr
)
413 struct data_reference
*dra
, *drb
;
415 fprintf (outf
, "(Data Dep: \n");
417 if (!ddr
|| DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
424 dump_data_reference (outf
, dra
);
426 fprintf (outf
, " (nil)\n");
428 dump_data_reference (outf
, drb
);
430 fprintf (outf
, " (nil)\n");
432 fprintf (outf
, " (don't know)\n)\n");
438 dump_data_reference (outf
, dra
);
439 dump_data_reference (outf
, drb
);
441 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
442 fprintf (outf
, " (no dependence)\n");
444 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
450 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr
), i
, sub
)
452 fprintf (outf
, " access_fn_A: ");
453 print_generic_stmt (outf
, SUB_ACCESS_FN (sub
, 0));
454 fprintf (outf
, " access_fn_B: ");
455 print_generic_stmt (outf
, SUB_ACCESS_FN (sub
, 1));
456 dump_subscript (outf
, sub
);
459 fprintf (outf
, " loop nest: (");
460 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr
), i
, loopi
)
461 fprintf (outf
, "%d ", loopi
->num
);
462 fprintf (outf
, ")\n");
464 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
466 fprintf (outf
, " distance_vector: ");
467 print_lambda_vector (outf
, DDR_DIST_VECT (ddr
, i
),
471 for (i
= 0; i
< DDR_NUM_DIR_VECTS (ddr
); i
++)
473 fprintf (outf
, " direction_vector: ");
474 print_direction_vector (outf
, DDR_DIR_VECT (ddr
, i
),
479 fprintf (outf
, ")\n");
485 debug_data_dependence_relation (const struct data_dependence_relation
*ddr
)
487 dump_data_dependence_relation (stderr
, ddr
);
490 /* Dump into FILE all the dependence relations from DDRS. */
493 dump_data_dependence_relations (FILE *file
, const vec
<ddr_p
> &ddrs
)
495 for (auto ddr
: ddrs
)
496 dump_data_dependence_relation (file
, ddr
);
500 debug (vec
<ddr_p
> &ref
)
502 dump_data_dependence_relations (stderr
, ref
);
506 debug (vec
<ddr_p
> *ptr
)
511 fprintf (stderr
, "<nil>\n");
515 /* Dump to STDERR all the dependence relations from DDRS. */
518 debug_data_dependence_relations (vec
<ddr_p
> ddrs
)
520 dump_data_dependence_relations (stderr
, ddrs
);
523 /* Dumps the distance and direction vectors in FILE. DDRS contains
524 the dependence relations, and VECT_SIZE is the size of the
525 dependence vectors, or in other words the number of loops in the
529 dump_dist_dir_vectors (FILE *file
, vec
<ddr_p
> ddrs
)
531 for (data_dependence_relation
*ddr
: ddrs
)
532 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
&& DDR_AFFINE_P (ddr
))
534 for (lambda_vector v
: DDR_DIST_VECTS (ddr
))
536 fprintf (file
, "DISTANCE_V (");
537 print_lambda_vector (file
, v
, DDR_NB_LOOPS (ddr
));
538 fprintf (file
, ")\n");
541 for (lambda_vector v
: DDR_DIR_VECTS (ddr
))
543 fprintf (file
, "DIRECTION_V (");
544 print_direction_vector (file
, v
, DDR_NB_LOOPS (ddr
));
545 fprintf (file
, ")\n");
549 fprintf (file
, "\n\n");
552 /* Dumps the data dependence relations DDRS in FILE. */
555 dump_ddrs (FILE *file
, vec
<ddr_p
> ddrs
)
557 for (data_dependence_relation
*ddr
: ddrs
)
558 dump_data_dependence_relation (file
, ddr
);
560 fprintf (file
, "\n\n");
564 debug_ddrs (vec
<ddr_p
> ddrs
)
566 dump_ddrs (stderr
, ddrs
);
569 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
572 - OP0 CODE OP1 has integral type TYPE
573 - the range of OP0 is given by OP0_RANGE and
574 - the range of OP1 is given by OP1_RANGE.
576 Independently of RESULT_RANGE, try to compute:
578 DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
579 - (sizetype) (OP0 CODE OP1)
581 as a constant and subtract DELTA from the ssizetype constant in *OFF.
582 Return true on success, or false if DELTA is not known at compile time.
584 Truncation and sign changes are known to distribute over CODE, i.e.
586 (itype) (A CODE B) == (itype) A CODE (itype) B
588 for any integral type ITYPE whose precision is no greater than the
589 precision of A and B. */
592 compute_distributive_range (tree type
, irange
&op0_range
,
593 tree_code code
, irange
&op1_range
,
594 tree
*off
, irange
*result_range
)
596 gcc_assert (INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
));
599 range_op_handler
op (code
);
600 if (!op
.fold_range (*result_range
, type
, op0_range
, op1_range
))
601 result_range
->set_varying (type
);
604 /* The distributive property guarantees that if TYPE is no narrower
607 (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
609 and so we can treat DELTA as zero. */
610 if (TYPE_PRECISION (type
) >= TYPE_PRECISION (sizetype
))
613 /* If overflow is undefined, we can assume that:
615 X == (ssizetype) OP0 CODE (ssizetype) OP1
617 is within the range of TYPE, i.e.:
619 X == (ssizetype) (TYPE) X
621 Distributing the (TYPE) truncation over X gives:
623 X == (ssizetype) (OP0 CODE OP1)
625 Casting both sides to sizetype and distributing the sizetype cast
628 (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
630 and so we can treat DELTA as zero. */
631 if (TYPE_OVERFLOW_UNDEFINED (type
))
634 /* Compute the range of:
636 (ssizetype) OP0 CODE (ssizetype) OP1
638 The distributive property guarantees that this has the same bitpattern as:
640 (sizetype) OP0 CODE (sizetype) OP1
642 but its range is more conducive to analysis. */
643 range_cast (op0_range
, ssizetype
);
644 range_cast (op1_range
, ssizetype
);
645 int_range_max wide_range
;
646 range_op_handler
op (code
);
647 bool saved_flag_wrapv
= flag_wrapv
;
649 if (!op
.fold_range (wide_range
, ssizetype
, op0_range
, op1_range
))
650 wide_range
.set_varying (ssizetype
);;
651 flag_wrapv
= saved_flag_wrapv
;
652 if (wide_range
.num_pairs () != 1
653 || wide_range
.varying_p () || wide_range
.undefined_p ())
656 wide_int lb
= wide_range
.lower_bound ();
657 wide_int ub
= wide_range
.upper_bound ();
659 /* Calculate the number of times that each end of the range overflows or
660 underflows TYPE. We can only calculate DELTA if the numbers match. */
661 unsigned int precision
= TYPE_PRECISION (type
);
662 if (!TYPE_UNSIGNED (type
))
664 wide_int type_min
= wi::mask (precision
- 1, true, lb
.get_precision ());
668 wide_int upper_bits
= wi::mask (precision
, true, lb
.get_precision ());
674 /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
675 negative values indicating underflow. The low PRECISION bits of LB
676 are clear, so DELTA is therefore LB (== UB). */
677 *off
= wide_int_to_tree (ssizetype
, wi::to_wide (*off
) - lb
);
681 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
682 given that OP has type FROM_TYPE and range RANGE. Both TO_TYPE and
683 FROM_TYPE are integral types. */
686 nop_conversion_for_offset_p (tree to_type
, tree from_type
, irange
&range
)
688 gcc_assert (INTEGRAL_TYPE_P (to_type
)
689 && INTEGRAL_TYPE_P (from_type
)
690 && !TYPE_OVERFLOW_TRAPS (to_type
)
691 && !TYPE_OVERFLOW_TRAPS (from_type
));
693 /* Converting to something no narrower than sizetype and then to sizetype
694 is equivalent to converting directly to sizetype. */
695 if (TYPE_PRECISION (to_type
) >= TYPE_PRECISION (sizetype
))
698 /* Check whether TO_TYPE can represent all values that FROM_TYPE can. */
699 if (TYPE_PRECISION (from_type
) < TYPE_PRECISION (to_type
)
700 && (TYPE_UNSIGNED (from_type
) || !TYPE_UNSIGNED (to_type
)))
703 /* For narrowing conversions, we could in principle test whether
704 the bits in FROM_TYPE but not in TO_TYPE have a fixed value
705 and apply a constant adjustment.
707 For other conversions (which involve a sign change) we could
708 check that the signs are always equal, and apply a constant
709 adjustment if the signs are negative.
711 However, both cases should be rare. */
712 return range_fits_type_p (&range
, TYPE_PRECISION (to_type
),
713 TYPE_SIGN (to_type
));
717 split_constant_offset (tree type
, tree
*var
, tree
*off
,
718 irange
*result_range
,
719 hash_map
<tree
, std::pair
<tree
, tree
> > &cache
,
722 /* Helper function for split_constant_offset. If TYPE is a pointer type,
723 try to express OP0 CODE OP1 as:
725 POINTER_PLUS <*VAR, (sizetype) *OFF>
730 - *OFF is a constant of type ssizetype.
732 If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
734 *VAR + (sizetype) *OFF
738 - *VAR has type sizetype
739 - *OFF is a constant of type ssizetype.
741 In both cases, OP0 CODE OP1 has type TYPE.
743 Return true on success. A false return value indicates that we can't
744 do better than set *OFF to zero.
746 When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
747 if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
749 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
750 visited. LIMIT counts down the number of SSA names that we are
751 allowed to process before giving up. */
754 split_constant_offset_1 (tree type
, tree op0
, enum tree_code code
, tree op1
,
755 tree
*var
, tree
*off
, irange
*result_range
,
756 hash_map
<tree
, std::pair
<tree
, tree
> > &cache
,
761 int_range_max op0_range
, op1_range
;
766 if (INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_TRAPS (type
))
769 if (TREE_CODE (op0
) == SSA_NAME
770 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0
))
773 && TREE_CODE (op1
) == SSA_NAME
774 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1
))
781 *off
= fold_convert (ssizetype
, op0
);
784 wide_int w
= wi::to_wide (op0
);
785 result_range
->set (TREE_TYPE (op0
), w
, w
);
789 case POINTER_PLUS_EXPR
:
790 split_constant_offset (op0
, &var0
, &off0
, nullptr, cache
, limit
);
791 split_constant_offset (op1
, &var1
, &off1
, nullptr, cache
, limit
);
792 *var
= fold_build2 (POINTER_PLUS_EXPR
, type
, var0
, var1
);
793 *off
= size_binop (PLUS_EXPR
, off0
, off1
);
798 split_constant_offset (op0
, &var0
, &off0
, &op0_range
, cache
, limit
);
799 split_constant_offset (op1
, &var1
, &off1
, &op1_range
, cache
, limit
);
800 *off
= size_binop (code
, off0
, off1
);
801 if (!compute_distributive_range (type
, op0_range
, code
, op1_range
,
804 *var
= fold_build2 (code
, sizetype
, var0
, var1
);
808 if (TREE_CODE (op1
) != INTEGER_CST
)
811 split_constant_offset (op0
, &var0
, &off0
, &op0_range
, cache
, limit
);
812 op1_range
.set (TREE_TYPE (op1
), wi::to_wide (op1
), wi::to_wide (op1
));
813 *off
= size_binop (MULT_EXPR
, off0
, fold_convert (ssizetype
, op1
));
814 if (!compute_distributive_range (type
, op0_range
, code
, op1_range
,
817 *var
= fold_build2 (MULT_EXPR
, sizetype
, var0
,
818 fold_convert (sizetype
, op1
));
824 poly_int64 pbitsize
, pbitpos
, pbytepos
;
826 int punsignedp
, preversep
, pvolatilep
;
828 op0
= TREE_OPERAND (op0
, 0);
830 = get_inner_reference (op0
, &pbitsize
, &pbitpos
, &poffset
, &pmode
,
831 &punsignedp
, &preversep
, &pvolatilep
);
833 if (!multiple_p (pbitpos
, BITS_PER_UNIT
, &pbytepos
))
835 base
= build_fold_addr_expr (base
);
836 off0
= ssize_int (pbytepos
);
840 split_constant_offset (poffset
, &poffset
, &off1
, nullptr,
842 off0
= size_binop (PLUS_EXPR
, off0
, off1
);
843 base
= fold_build_pointer_plus (base
, poffset
);
846 var0
= fold_convert (type
, base
);
848 /* If variable length types are involved, punt, otherwise casts
849 might be converted into ARRAY_REFs in gimplify_conversion.
850 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
851 possibly no longer appears in current GIMPLE, might resurface.
852 This perhaps could run
853 if (CONVERT_EXPR_P (var0))
855 gimplify_conversion (&var0);
856 // Attempt to fill in any within var0 found ARRAY_REF's
857 // element size from corresponding op embedded ARRAY_REF,
858 // if unsuccessful, just punt.
860 while (POINTER_TYPE_P (type
))
861 type
= TREE_TYPE (type
);
862 if (int_size_in_bytes (type
) < 0)
872 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op0
);
873 enum tree_code subcode
;
875 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
878 subcode
= gimple_assign_rhs_code (def_stmt
);
880 /* We are using a cache to avoid un-CSEing large amounts of code. */
881 bool use_cache
= false;
882 if (!has_single_use (op0
)
883 && (subcode
== POINTER_PLUS_EXPR
884 || subcode
== PLUS_EXPR
885 || subcode
== MINUS_EXPR
886 || subcode
== MULT_EXPR
887 || subcode
== ADDR_EXPR
888 || CONVERT_EXPR_CODE_P (subcode
)))
892 std::pair
<tree
, tree
> &e
= cache
.get_or_insert (op0
, &existed
);
895 if (integer_zerop (e
.second
))
899 /* The caller sets the range in this case. */
902 e
= std::make_pair (op0
, ssize_int (0));
909 var0
= gimple_assign_rhs1 (def_stmt
);
910 var1
= gimple_assign_rhs2 (def_stmt
);
912 bool res
= split_constant_offset_1 (type
, var0
, subcode
, var1
,
913 var
, off
, nullptr, cache
, limit
);
914 if (res
&& use_cache
)
915 *cache
.get (op0
) = std::make_pair (*var
, *off
);
916 /* The caller sets the range in this case. */
921 /* We can only handle the following conversions:
923 - Conversions from one pointer type to another pointer type.
925 - Conversions from one non-trapping integral type to another
926 non-trapping integral type. In this case, the recursive
927 call makes sure that:
931 can be expressed as a sizetype operation involving VAR and OFF,
932 and all we need to do is check whether:
934 (sizetype) OP0 == (sizetype) (TYPE) OP0
936 - Conversions from a non-trapping sizetype-size integral type to
937 a like-sized pointer type. In this case, the recursive call
940 (sizetype) OP0 == *VAR + (sizetype) *OFF
942 and we can convert that to:
944 POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
946 - Conversions from a sizetype-sized pointer type to a like-sized
947 non-trapping integral type. In this case, the recursive call
950 OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
952 where the POINTER_PLUS and *VAR have the same precision as
953 TYPE (and the same precision as sizetype). Then:
955 (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF. */
956 tree itype
= TREE_TYPE (op0
);
957 if ((POINTER_TYPE_P (itype
)
958 || (INTEGRAL_TYPE_P (itype
) && !TYPE_OVERFLOW_TRAPS (itype
)))
959 && (POINTER_TYPE_P (type
)
960 || (INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
)))
961 && (POINTER_TYPE_P (type
) == POINTER_TYPE_P (itype
)
962 || (TYPE_PRECISION (type
) == TYPE_PRECISION (sizetype
)
963 && TYPE_PRECISION (itype
) == TYPE_PRECISION (sizetype
))))
965 if (POINTER_TYPE_P (type
))
967 split_constant_offset (op0
, var
, off
, nullptr, cache
, limit
);
968 *var
= fold_convert (type
, *var
);
970 else if (POINTER_TYPE_P (itype
))
972 split_constant_offset (op0
, var
, off
, nullptr, cache
, limit
);
973 *var
= fold_convert (sizetype
, *var
);
977 split_constant_offset (op0
, var
, off
, &op0_range
,
979 if (!nop_conversion_for_offset_p (type
, itype
, op0_range
))
983 *result_range
= op0_range
;
984 range_cast (*result_range
, type
);
997 /* If EXP has pointer type, try to express it as:
999 POINTER_PLUS <*VAR, (sizetype) *OFF>
1003 - *VAR has the same type as EXP
1004 - *OFF is a constant of type ssizetype.
1006 If EXP has an integral type, try to express (sizetype) EXP as:
1008 *VAR + (sizetype) *OFF
1012 - *VAR has type sizetype
1013 - *OFF is a constant of type ssizetype.
1015 If EXP_RANGE is nonnull, set it to the range of EXP.
1017 CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1018 visited. LIMIT counts down the number of SSA names that we are
1019 allowed to process before giving up. */
1022 split_constant_offset (tree exp
, tree
*var
, tree
*off
, irange
*exp_range
,
1023 hash_map
<tree
, std::pair
<tree
, tree
> > &cache
,
1026 tree type
= TREE_TYPE (exp
), op0
, op1
;
1027 enum tree_code code
;
1029 code
= TREE_CODE (exp
);
1032 exp_range
->set_varying (type
);
1033 if (code
== SSA_NAME
)
1036 get_range_query (cfun
)->range_of_expr (vr
, exp
);
1037 if (vr
.undefined_p ())
1038 vr
.set_varying (TREE_TYPE (exp
));
1039 tree vr_min
, vr_max
;
1040 value_range_kind vr_kind
= get_legacy_range (vr
, vr_min
, vr_max
);
1041 wide_int var_min
= wi::to_wide (vr_min
);
1042 wide_int var_max
= wi::to_wide (vr_max
);
1043 wide_int var_nonzero
= get_nonzero_bits (exp
);
1044 vr_kind
= intersect_range_with_nonzero_bits (vr_kind
,
1048 /* This check for VR_VARYING is here because the old code
1049 using get_range_info would return VR_RANGE for the entire
1050 domain, instead of VR_VARYING. The new code normalizes
1051 full-domain ranges to VR_VARYING. */
1052 if (vr_kind
== VR_RANGE
|| vr_kind
== VR_VARYING
)
1053 exp_range
->set (type
, var_min
, var_max
);
1057 if (!tree_is_chrec (exp
)
1058 && get_gimple_rhs_class (TREE_CODE (exp
)) != GIMPLE_TERNARY_RHS
)
1060 extract_ops_from_tree (exp
, &code
, &op0
, &op1
);
1061 if (split_constant_offset_1 (type
, op0
, code
, op1
, var
, off
,
1062 exp_range
, cache
, limit
))
1067 if (INTEGRAL_TYPE_P (type
))
1068 *var
= fold_convert (sizetype
, *var
);
1069 *off
= ssize_int (0);
1072 if (exp_range
&& code
!= SSA_NAME
1073 && get_range_query (cfun
)->range_of_expr (r
, exp
)
1074 && !r
.undefined_p ())
1078 /* Expresses EXP as VAR + OFF, where OFF is a constant. VAR has the same
1079 type as EXP while OFF has type ssizetype. */
1082 split_constant_offset (tree exp
, tree
*var
, tree
*off
)
1084 unsigned limit
= param_ssa_name_def_chain_limit
;
1085 static hash_map
<tree
, std::pair
<tree
, tree
> > *cache
;
1087 cache
= new hash_map
<tree
, std::pair
<tree
, tree
> > (37);
1088 split_constant_offset (exp
, var
, off
, nullptr, *cache
, &limit
);
1089 *var
= fold_convert (TREE_TYPE (exp
), *var
);
1093 /* Returns the address ADDR of an object in a canonical shape (without nop
1094 casts, and with type of pointer to the object). */
1097 canonicalize_base_object_address (tree addr
)
1103 /* The base address may be obtained by casting from integer, in that case
1105 if (!POINTER_TYPE_P (TREE_TYPE (addr
)))
1108 if (TREE_CODE (addr
) != ADDR_EXPR
)
1111 return build_fold_addr_expr (TREE_OPERAND (addr
, 0));
1114 /* Analyze the behavior of memory reference REF within STMT.
1115 There are two modes:
1117 - BB analysis. In this case we simply split the address into base,
1118 init and offset components, without reference to any containing loop.
1119 The resulting base and offset are general expressions and they can
1120 vary arbitrarily from one iteration of the containing loop to the next.
1121 The step is always zero.
1123 - loop analysis. In this case we analyze the reference both wrt LOOP
1124 and on the basis that the reference occurs (is "used") in LOOP;
1125 see the comment above analyze_scalar_evolution_in_loop for more
1126 information about this distinction. The base, init, offset and
1127 step fields are all invariant in LOOP.
1129 Perform BB analysis if LOOP is null, or if LOOP is the function's
1130 dummy outermost loop. In other cases perform loop analysis.
1132 Return true if the analysis succeeded and store the results in DRB if so.
1133 BB analysis can only fail for bitfield or reversed-storage accesses. */
1136 dr_analyze_innermost (innermost_loop_behavior
*drb
, tree ref
,
1137 class loop
*loop
, const gimple
*stmt
)
1139 poly_int64 pbitsize
, pbitpos
;
1142 int punsignedp
, preversep
, pvolatilep
;
1143 affine_iv base_iv
, offset_iv
;
1144 tree init
, dinit
, step
;
1145 bool in_loop
= (loop
&& loop
->num
);
1147 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1148 fprintf (dump_file
, "analyze_innermost: ");
1150 base
= get_inner_reference (ref
, &pbitsize
, &pbitpos
, &poffset
, &pmode
,
1151 &punsignedp
, &preversep
, &pvolatilep
);
1152 gcc_assert (base
!= NULL_TREE
);
1154 poly_int64 pbytepos
;
1155 if (!multiple_p (pbitpos
, BITS_PER_UNIT
, &pbytepos
))
1156 return opt_result::failure_at (stmt
,
1157 "failed: bit offset alignment.\n");
1160 return opt_result::failure_at (stmt
,
1161 "failed: reverse storage order.\n");
1163 /* Calculate the alignment and misalignment for the inner reference. */
1164 unsigned int HOST_WIDE_INT bit_base_misalignment
;
1165 unsigned int bit_base_alignment
;
1166 get_object_alignment_1 (base
, &bit_base_alignment
, &bit_base_misalignment
);
1168 /* There are no bitfield references remaining in BASE, so the values
1169 we got back must be whole bytes. */
1170 gcc_assert (bit_base_alignment
% BITS_PER_UNIT
== 0
1171 && bit_base_misalignment
% BITS_PER_UNIT
== 0);
1172 unsigned int base_alignment
= bit_base_alignment
/ BITS_PER_UNIT
;
1173 poly_int64 base_misalignment
= bit_base_misalignment
/ BITS_PER_UNIT
;
1175 if (TREE_CODE (base
) == MEM_REF
)
1177 if (!integer_zerop (TREE_OPERAND (base
, 1)))
1179 /* Subtract MOFF from the base and add it to POFFSET instead.
1180 Adjust the misalignment to reflect the amount we subtracted. */
1181 poly_offset_int moff
= mem_ref_offset (base
);
1182 base_misalignment
-= moff
.force_shwi ();
1183 tree mofft
= wide_int_to_tree (sizetype
, moff
);
1187 poffset
= size_binop (PLUS_EXPR
, poffset
, mofft
);
1189 base
= TREE_OPERAND (base
, 0);
1193 if (may_be_nonaddressable_p (base
))
1194 return opt_result::failure_at (stmt
,
1195 "failed: base not addressable.\n");
1196 base
= build_fold_addr_expr (base
);
1201 if (!simple_iv (loop
, loop
, base
, &base_iv
, true))
1202 return opt_result::failure_at
1203 (stmt
, "failed: evolution of base is not affine.\n");
1207 base_iv
.base
= base
;
1208 base_iv
.step
= ssize_int (0);
1209 base_iv
.no_overflow
= true;
1214 offset_iv
.base
= ssize_int (0);
1215 offset_iv
.step
= ssize_int (0);
1221 offset_iv
.base
= poffset
;
1222 offset_iv
.step
= ssize_int (0);
1224 else if (!simple_iv (loop
, loop
, poffset
, &offset_iv
, true))
1225 return opt_result::failure_at
1226 (stmt
, "failed: evolution of offset is not affine.\n");
1229 init
= ssize_int (pbytepos
);
1231 /* Subtract any constant component from the base and add it to INIT instead.
1232 Adjust the misalignment to reflect the amount we subtracted. */
1233 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
1234 init
= size_binop (PLUS_EXPR
, init
, dinit
);
1235 base_misalignment
-= TREE_INT_CST_LOW (dinit
);
1237 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
1238 init
= size_binop (PLUS_EXPR
, init
, dinit
);
1240 step
= size_binop (PLUS_EXPR
,
1241 fold_convert (ssizetype
, base_iv
.step
),
1242 fold_convert (ssizetype
, offset_iv
.step
));
1244 base
= canonicalize_base_object_address (base_iv
.base
);
1246 /* See if get_pointer_alignment can guarantee a higher alignment than
1247 the one we calculated above. */
1248 unsigned int HOST_WIDE_INT alt_misalignment
;
1249 unsigned int alt_alignment
;
1250 get_pointer_alignment_1 (base
, &alt_alignment
, &alt_misalignment
);
1252 /* As above, these values must be whole bytes. */
1253 gcc_assert (alt_alignment
% BITS_PER_UNIT
== 0
1254 && alt_misalignment
% BITS_PER_UNIT
== 0);
1255 alt_alignment
/= BITS_PER_UNIT
;
1256 alt_misalignment
/= BITS_PER_UNIT
;
1258 if (base_alignment
< alt_alignment
)
1260 base_alignment
= alt_alignment
;
1261 base_misalignment
= alt_misalignment
;
1264 drb
->base_address
= base
;
1265 drb
->offset
= fold_convert (ssizetype
, offset_iv
.base
);
1268 if (known_misalignment (base_misalignment
, base_alignment
,
1269 &drb
->base_misalignment
))
1270 drb
->base_alignment
= base_alignment
;
1273 drb
->base_alignment
= known_alignment (base_misalignment
);
1274 drb
->base_misalignment
= 0;
1276 drb
->offset_alignment
= highest_pow2_factor (offset_iv
.base
);
1277 drb
->step_alignment
= highest_pow2_factor (step
);
1279 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1280 fprintf (dump_file
, "success.\n");
1282 return opt_result::success ();
1285 /* Return true if OP is a valid component reference for a DR access
1286 function. This accepts a subset of what handled_component_p accepts. */
1289 access_fn_component_p (tree op
)
1291 switch (TREE_CODE (op
))
1299 return TREE_CODE (TREE_TYPE (TREE_OPERAND (op
, 0))) == RECORD_TYPE
;
1306 /* Returns whether BASE can have a access_fn_component_p with BASE
1310 base_supports_access_fn_components_p (tree base
)
1312 switch (TREE_CODE (TREE_TYPE (base
)))
1323 /* Determines the base object and the list of indices of memory reference
1324 DR, analyzed in LOOP and instantiated before NEST. */
1327 dr_analyze_indices (struct indices
*dri
, tree ref
, edge nest
, loop_p loop
)
1329 /* If analyzing a basic-block there are no indices to analyze
1330 and thus no access functions. */
1333 dri
->base_object
= ref
;
1334 dri
->access_fns
.create (0);
1338 vec
<tree
> access_fns
= vNULL
;
1340 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1341 into a two element array with a constant index. The base is
1342 then just the immediate underlying object. */
1343 if (TREE_CODE (ref
) == REALPART_EXPR
)
1345 ref
= TREE_OPERAND (ref
, 0);
1346 access_fns
.safe_push (integer_zero_node
);
1348 else if (TREE_CODE (ref
) == IMAGPART_EXPR
)
1350 ref
= TREE_OPERAND (ref
, 0);
1351 access_fns
.safe_push (integer_one_node
);
1354 /* Analyze access functions of dimensions we know to be independent.
1355 The list of component references handled here should be kept in
1356 sync with access_fn_component_p. */
1357 while (handled_component_p (ref
))
1359 if (TREE_CODE (ref
) == ARRAY_REF
)
1361 tree op
= TREE_OPERAND (ref
, 1);
1362 tree access_fn
= analyze_scalar_evolution (loop
, op
);
1363 access_fn
= instantiate_scev (nest
, loop
, access_fn
);
1364 access_fns
.safe_push (access_fn
);
1366 else if (TREE_CODE (ref
) == COMPONENT_REF
1367 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref
, 0))) == RECORD_TYPE
)
1369 /* For COMPONENT_REFs of records (but not unions!) use the
1370 FIELD_DECL offset as constant access function so we can
1371 disambiguate a[i].f1 and a[i].f2. */
1372 tree off
= component_ref_field_offset (ref
);
1373 off
= size_binop (PLUS_EXPR
,
1374 size_binop (MULT_EXPR
,
1375 fold_convert (bitsizetype
, off
),
1376 bitsize_int (BITS_PER_UNIT
)),
1377 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref
, 1)));
1378 access_fns
.safe_push (off
);
1381 /* If we have an unhandled component we could not translate
1382 to an access function stop analyzing. We have determined
1383 our base object in this case. */
1386 ref
= TREE_OPERAND (ref
, 0);
1389 /* If the address operand of a MEM_REF base has an evolution in the
1390 analyzed nest, add it as an additional independent access-function. */
1391 if (TREE_CODE (ref
) == MEM_REF
)
1393 tree op
= TREE_OPERAND (ref
, 0);
1394 tree access_fn
= analyze_scalar_evolution (loop
, op
);
1395 access_fn
= instantiate_scev (nest
, loop
, access_fn
);
1396 STRIP_NOPS (access_fn
);
1397 if (TREE_CODE (access_fn
) == POLYNOMIAL_CHREC
)
1399 tree memoff
= TREE_OPERAND (ref
, 1);
1400 tree base
= initial_condition (access_fn
);
1401 tree orig_type
= TREE_TYPE (base
);
1402 STRIP_USELESS_TYPE_CONVERSION (base
);
1404 split_constant_offset (base
, &base
, &off
);
1405 STRIP_USELESS_TYPE_CONVERSION (base
);
1406 /* Fold the MEM_REF offset into the evolutions initial
1407 value to make more bases comparable. */
1408 if (!integer_zerop (memoff
))
1410 off
= size_binop (PLUS_EXPR
, off
,
1411 fold_convert (ssizetype
, memoff
));
1412 memoff
= build_int_cst (TREE_TYPE (memoff
), 0);
1414 /* Adjust the offset so it is a multiple of the access type
1415 size and thus we separate bases that can possibly be used
1416 to produce partial overlaps (which the access_fn machinery
1419 if (TYPE_SIZE_UNIT (TREE_TYPE (ref
))
1420 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref
))) == INTEGER_CST
1421 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref
))))
1424 wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref
))),
1427 /* If we can't compute the remainder simply force the initial
1428 condition to zero. */
1429 rem
= wi::to_wide (off
);
1430 off
= wide_int_to_tree (ssizetype
, wi::to_wide (off
) - rem
);
1431 memoff
= wide_int_to_tree (TREE_TYPE (memoff
), rem
);
1432 /* And finally replace the initial condition. */
1433 access_fn
= chrec_replace_initial_condition
1434 (access_fn
, fold_convert (orig_type
, off
));
1435 /* ??? This is still not a suitable base object for
1436 dr_may_alias_p - the base object needs to be an
1437 access that covers the object as whole. With
1438 an evolution in the pointer this cannot be
1440 As a band-aid, mark the access so we can special-case
1441 it in dr_may_alias_p. */
1443 ref
= fold_build2_loc (EXPR_LOCATION (ref
),
1444 MEM_REF
, TREE_TYPE (ref
),
1446 MR_DEPENDENCE_CLIQUE (ref
) = MR_DEPENDENCE_CLIQUE (old
);
1447 MR_DEPENDENCE_BASE (ref
) = MR_DEPENDENCE_BASE (old
);
1448 dri
->unconstrained_base
= true;
1449 access_fns
.safe_push (access_fn
);
1452 else if (DECL_P (ref
))
1454 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */
1455 ref
= build2 (MEM_REF
, TREE_TYPE (ref
),
1456 build_fold_addr_expr (ref
),
1457 build_int_cst (reference_alias_ptr_type (ref
), 0));
1460 dri
->base_object
= ref
;
1461 dri
->access_fns
= access_fns
;
1464 /* Extracts the alias analysis information from the memory reference DR. */
1467 dr_analyze_alias (struct data_reference
*dr
)
1469 tree ref
= DR_REF (dr
);
1470 tree base
= get_base_address (ref
), addr
;
1472 if (INDIRECT_REF_P (base
)
1473 || TREE_CODE (base
) == MEM_REF
)
1475 addr
= TREE_OPERAND (base
, 0);
1476 if (TREE_CODE (addr
) == SSA_NAME
)
1477 DR_PTR_INFO (dr
) = SSA_NAME_PTR_INFO (addr
);
1481 /* Frees data reference DR. */
1484 free_data_ref (data_reference_p dr
)
1486 DR_ACCESS_FNS (dr
).release ();
1487 if (dr
->alt_indices
.base_object
)
1488 dr
->alt_indices
.access_fns
.release ();
1492 /* Analyze memory reference MEMREF, which is accessed in STMT.
1493 The reference is a read if IS_READ is true, otherwise it is a write.
1494 IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1495 within STMT, i.e. that it might not occur even if STMT is executed
1496 and runs to completion.
1498 Return the data_reference description of MEMREF. NEST is the outermost
1499 loop in which the reference should be instantiated, LOOP is the loop
1500 in which the data reference should be analyzed. */
1502 struct data_reference
*
1503 create_data_ref (edge nest
, loop_p loop
, tree memref
, gimple
*stmt
,
1504 bool is_read
, bool is_conditional_in_stmt
)
1506 struct data_reference
*dr
;
1508 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1510 fprintf (dump_file
, "Creating dr for ");
1511 print_generic_expr (dump_file
, memref
, TDF_SLIM
);
1512 fprintf (dump_file
, "\n");
1515 dr
= XCNEW (struct data_reference
);
1516 DR_STMT (dr
) = stmt
;
1517 DR_REF (dr
) = memref
;
1518 DR_IS_READ (dr
) = is_read
;
1519 DR_IS_CONDITIONAL_IN_STMT (dr
) = is_conditional_in_stmt
;
1521 dr_analyze_innermost (&DR_INNERMOST (dr
), memref
,
1522 nest
!= NULL
? loop
: NULL
, stmt
);
1523 dr_analyze_indices (&dr
->indices
, DR_REF (dr
), nest
, loop
);
1524 dr_analyze_alias (dr
);
1526 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1529 fprintf (dump_file
, "\tbase_address: ");
1530 print_generic_expr (dump_file
, DR_BASE_ADDRESS (dr
), TDF_SLIM
);
1531 fprintf (dump_file
, "\n\toffset from base address: ");
1532 print_generic_expr (dump_file
, DR_OFFSET (dr
), TDF_SLIM
);
1533 fprintf (dump_file
, "\n\tconstant offset from base address: ");
1534 print_generic_expr (dump_file
, DR_INIT (dr
), TDF_SLIM
);
1535 fprintf (dump_file
, "\n\tstep: ");
1536 print_generic_expr (dump_file
, DR_STEP (dr
), TDF_SLIM
);
1537 fprintf (dump_file
, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr
));
1538 fprintf (dump_file
, "\n\tbase misalignment: %d",
1539 DR_BASE_MISALIGNMENT (dr
));
1540 fprintf (dump_file
, "\n\toffset alignment: %d",
1541 DR_OFFSET_ALIGNMENT (dr
));
1542 fprintf (dump_file
, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr
));
1543 fprintf (dump_file
, "\n\tbase_object: ");
1544 print_generic_expr (dump_file
, DR_BASE_OBJECT (dr
), TDF_SLIM
);
1545 fprintf (dump_file
, "\n");
1546 for (i
= 0; i
< DR_NUM_DIMENSIONS (dr
); i
++)
1548 fprintf (dump_file
, "\tAccess function %d: ", i
);
1549 print_generic_stmt (dump_file
, DR_ACCESS_FN (dr
, i
), TDF_SLIM
);
1556 /* A helper function computes order between two tree expressions T1 and T2.
1557 This is used in comparator functions sorting objects based on the order
1558 of tree expressions. The function returns -1, 0, or 1. */
1561 data_ref_compare_tree (tree t1
, tree t2
)
1564 enum tree_code code
;
1574 STRIP_USELESS_TYPE_CONVERSION (t1
);
1575 STRIP_USELESS_TYPE_CONVERSION (t2
);
1579 if (TREE_CODE (t1
) != TREE_CODE (t2
)
1580 && ! (CONVERT_EXPR_P (t1
) && CONVERT_EXPR_P (t2
)))
1581 return TREE_CODE (t1
) < TREE_CODE (t2
) ? -1 : 1;
1583 code
= TREE_CODE (t1
);
1587 return tree_int_cst_compare (t1
, t2
);
1590 if (TREE_STRING_LENGTH (t1
) != TREE_STRING_LENGTH (t2
))
1591 return TREE_STRING_LENGTH (t1
) < TREE_STRING_LENGTH (t2
) ? -1 : 1;
1592 return memcmp (TREE_STRING_POINTER (t1
), TREE_STRING_POINTER (t2
),
1593 TREE_STRING_LENGTH (t1
));
1596 if (SSA_NAME_VERSION (t1
) != SSA_NAME_VERSION (t2
))
1597 return SSA_NAME_VERSION (t1
) < SSA_NAME_VERSION (t2
) ? -1 : 1;
1601 if (POLY_INT_CST_P (t1
))
1602 return compare_sizes_for_sort (wi::to_poly_widest (t1
),
1603 wi::to_poly_widest (t2
));
1605 tclass
= TREE_CODE_CLASS (code
);
1607 /* For decls, compare their UIDs. */
1608 if (tclass
== tcc_declaration
)
1610 if (DECL_UID (t1
) != DECL_UID (t2
))
1611 return DECL_UID (t1
) < DECL_UID (t2
) ? -1 : 1;
1614 /* For expressions, compare their operands recursively. */
1615 else if (IS_EXPR_CODE_CLASS (tclass
))
1617 for (i
= TREE_OPERAND_LENGTH (t1
) - 1; i
>= 0; --i
)
1619 cmp
= data_ref_compare_tree (TREE_OPERAND (t1
, i
),
1620 TREE_OPERAND (t2
, i
));
1632 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1636 runtime_alias_check_p (ddr_p ddr
, class loop
*loop
, bool speed_p
)
1638 if (dump_enabled_p ())
1639 dump_printf (MSG_NOTE
,
1640 "consider run-time aliasing test between %T and %T\n",
1641 DR_REF (DDR_A (ddr
)), DR_REF (DDR_B (ddr
)));
1644 return opt_result::failure_at (DR_STMT (DDR_A (ddr
)),
1645 "runtime alias check not supported when"
1646 " optimizing for size.\n");
1648 /* FORNOW: We don't support versioning with outer-loop in either
1649 vectorization or loop distribution. */
1650 if (loop
!= NULL
&& loop
->inner
!= NULL
)
1651 return opt_result::failure_at (DR_STMT (DDR_A (ddr
)),
1652 "runtime alias check not supported for"
1655 /* FORNOW: We don't support handling different address spaces. */
1656 if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr
)))))
1657 != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr
))))))
1658 return opt_result::failure_at (DR_STMT (DDR_A (ddr
)),
1659 "runtime alias check between different "
1660 "address spaces not supported.\n");
1662 return opt_result::success ();
1665 /* Operator == between two dr_with_seg_len objects.
1667 This equality operator is used to make sure two data refs
1668 are the same one so that we will consider to combine the
1669 aliasing checks of those two pairs of data dependent data
1673 operator == (const dr_with_seg_len
& d1
,
1674 const dr_with_seg_len
& d2
)
1676 return (operand_equal_p (DR_BASE_ADDRESS (d1
.dr
),
1677 DR_BASE_ADDRESS (d2
.dr
), 0)
1678 && data_ref_compare_tree (DR_OFFSET (d1
.dr
), DR_OFFSET (d2
.dr
)) == 0
1679 && data_ref_compare_tree (DR_INIT (d1
.dr
), DR_INIT (d2
.dr
)) == 0
1680 && data_ref_compare_tree (d1
.seg_len
, d2
.seg_len
) == 0
1681 && known_eq (d1
.access_size
, d2
.access_size
)
1682 && d1
.align
== d2
.align
);
1685 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1686 so that we can combine aliasing checks in one scan. */
1689 comp_dr_with_seg_len_pair (const void *pa_
, const void *pb_
)
1691 const dr_with_seg_len_pair_t
* pa
= (const dr_with_seg_len_pair_t
*) pa_
;
1692 const dr_with_seg_len_pair_t
* pb
= (const dr_with_seg_len_pair_t
*) pb_
;
1693 const dr_with_seg_len
&a1
= pa
->first
, &a2
= pa
->second
;
1694 const dr_with_seg_len
&b1
= pb
->first
, &b2
= pb
->second
;
1696 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1697 if a and c have the same basic address snd step, and b and d have the same
1698 address and step. Therefore, if any a&c or b&d don't have the same address
1699 and step, we don't care the order of those two pairs after sorting. */
1702 if ((comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (a1
.dr
),
1703 DR_BASE_ADDRESS (b1
.dr
))) != 0)
1705 if ((comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (a2
.dr
),
1706 DR_BASE_ADDRESS (b2
.dr
))) != 0)
1708 if ((comp_res
= data_ref_compare_tree (DR_STEP (a1
.dr
),
1709 DR_STEP (b1
.dr
))) != 0)
1711 if ((comp_res
= data_ref_compare_tree (DR_STEP (a2
.dr
),
1712 DR_STEP (b2
.dr
))) != 0)
1714 if ((comp_res
= data_ref_compare_tree (DR_OFFSET (a1
.dr
),
1715 DR_OFFSET (b1
.dr
))) != 0)
1717 if ((comp_res
= data_ref_compare_tree (DR_INIT (a1
.dr
),
1718 DR_INIT (b1
.dr
))) != 0)
1720 if ((comp_res
= data_ref_compare_tree (DR_OFFSET (a2
.dr
),
1721 DR_OFFSET (b2
.dr
))) != 0)
1723 if ((comp_res
= data_ref_compare_tree (DR_INIT (a2
.dr
),
1724 DR_INIT (b2
.dr
))) != 0)
1730 /* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
1733 dump_alias_pair (dr_with_seg_len_pair_t
*alias_pair
, const char *indent
)
1735 dump_printf (MSG_NOTE
, "%sreference: %T vs. %T\n", indent
,
1736 DR_REF (alias_pair
->first
.dr
),
1737 DR_REF (alias_pair
->second
.dr
));
1739 dump_printf (MSG_NOTE
, "%ssegment length: %T", indent
,
1740 alias_pair
->first
.seg_len
);
1741 if (!operand_equal_p (alias_pair
->first
.seg_len
,
1742 alias_pair
->second
.seg_len
, 0))
1743 dump_printf (MSG_NOTE
, " vs. %T", alias_pair
->second
.seg_len
);
1745 dump_printf (MSG_NOTE
, "\n%saccess size: ", indent
);
1746 dump_dec (MSG_NOTE
, alias_pair
->first
.access_size
);
1747 if (maybe_ne (alias_pair
->first
.access_size
, alias_pair
->second
.access_size
))
1749 dump_printf (MSG_NOTE
, " vs. ");
1750 dump_dec (MSG_NOTE
, alias_pair
->second
.access_size
);
1753 dump_printf (MSG_NOTE
, "\n%salignment: %d", indent
,
1754 alias_pair
->first
.align
);
1755 if (alias_pair
->first
.align
!= alias_pair
->second
.align
)
1756 dump_printf (MSG_NOTE
, " vs. %d", alias_pair
->second
.align
);
1758 dump_printf (MSG_NOTE
, "\n%sflags: ", indent
);
1759 if (alias_pair
->flags
& DR_ALIAS_RAW
)
1760 dump_printf (MSG_NOTE
, " RAW");
1761 if (alias_pair
->flags
& DR_ALIAS_WAR
)
1762 dump_printf (MSG_NOTE
, " WAR");
1763 if (alias_pair
->flags
& DR_ALIAS_WAW
)
1764 dump_printf (MSG_NOTE
, " WAW");
1765 if (alias_pair
->flags
& DR_ALIAS_ARBITRARY
)
1766 dump_printf (MSG_NOTE
, " ARBITRARY");
1767 if (alias_pair
->flags
& DR_ALIAS_SWAPPED
)
1768 dump_printf (MSG_NOTE
, " SWAPPED");
1769 if (alias_pair
->flags
& DR_ALIAS_UNSWAPPED
)
1770 dump_printf (MSG_NOTE
, " UNSWAPPED");
1771 if (alias_pair
->flags
& DR_ALIAS_MIXED_STEPS
)
1772 dump_printf (MSG_NOTE
, " MIXED_STEPS");
1773 if (alias_pair
->flags
== 0)
1774 dump_printf (MSG_NOTE
, " <none>");
1775 dump_printf (MSG_NOTE
, "\n");
1778 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1779 FACTOR is number of iterations that each data reference is accessed.
1781 Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1782 we create an expression:
1784 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1785 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1787 for aliasing checks. However, in some cases we can decrease the number
1788 of checks by combining two checks into one. For example, suppose we have
1789 another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1790 condition is satisfied:
1792 load_ptr_0 < load_ptr_1 &&
1793 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1795 (this condition means, in each iteration of vectorized loop, the accessed
1796 memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1799 we then can use only the following expression to finish the alising checks
1800 between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1802 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1803 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1805 Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1809 prune_runtime_alias_test_list (vec
<dr_with_seg_len_pair_t
> *alias_pairs
,
1812 if (alias_pairs
->is_empty ())
1815 /* Canonicalize each pair so that the base components are ordered wrt
1816 data_ref_compare_tree. This allows the loop below to merge more
1819 dr_with_seg_len_pair_t
*alias_pair
;
1820 FOR_EACH_VEC_ELT (*alias_pairs
, i
, alias_pair
)
1822 data_reference_p dr_a
= alias_pair
->first
.dr
;
1823 data_reference_p dr_b
= alias_pair
->second
.dr
;
1824 int comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (dr_a
),
1825 DR_BASE_ADDRESS (dr_b
));
1827 comp_res
= data_ref_compare_tree (DR_OFFSET (dr_a
), DR_OFFSET (dr_b
));
1829 comp_res
= data_ref_compare_tree (DR_INIT (dr_a
), DR_INIT (dr_b
));
1832 std::swap (alias_pair
->first
, alias_pair
->second
);
1833 alias_pair
->flags
|= DR_ALIAS_SWAPPED
;
1836 alias_pair
->flags
|= DR_ALIAS_UNSWAPPED
;
1839 /* Sort the collected data ref pairs so that we can scan them once to
1840 combine all possible aliasing checks. */
1841 alias_pairs
->qsort (comp_dr_with_seg_len_pair
);
1843 /* Scan the sorted dr pairs and check if we can combine alias checks
1844 of two neighboring dr pairs. */
1845 unsigned int last
= 0;
1846 for (i
= 1; i
< alias_pairs
->length (); ++i
)
1848 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
1849 dr_with_seg_len_pair_t
*alias_pair1
= &(*alias_pairs
)[last
];
1850 dr_with_seg_len_pair_t
*alias_pair2
= &(*alias_pairs
)[i
];
1852 dr_with_seg_len
*dr_a1
= &alias_pair1
->first
;
1853 dr_with_seg_len
*dr_b1
= &alias_pair1
->second
;
1854 dr_with_seg_len
*dr_a2
= &alias_pair2
->first
;
1855 dr_with_seg_len
*dr_b2
= &alias_pair2
->second
;
1857 /* Remove duplicate data ref pairs. */
1858 if (*dr_a1
== *dr_a2
&& *dr_b1
== *dr_b2
)
1860 if (dump_enabled_p ())
1861 dump_printf (MSG_NOTE
, "found equal ranges %T, %T and %T, %T\n",
1862 DR_REF (dr_a1
->dr
), DR_REF (dr_b1
->dr
),
1863 DR_REF (dr_a2
->dr
), DR_REF (dr_b2
->dr
));
1864 alias_pair1
->flags
|= alias_pair2
->flags
;
1868 /* Assume that we won't be able to merge the pairs, then correct
1872 (*alias_pairs
)[last
] = (*alias_pairs
)[i
];
1874 if (*dr_a1
== *dr_a2
|| *dr_b1
== *dr_b2
)
1876 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1877 and DR_A1 and DR_A2 are two consecutive memrefs. */
1878 if (*dr_a1
== *dr_a2
)
1880 std::swap (dr_a1
, dr_b1
);
1881 std::swap (dr_a2
, dr_b2
);
1884 poly_int64 init_a1
, init_a2
;
1885 /* Only consider cases in which the distance between the initial
1886 DR_A1 and the initial DR_A2 is known at compile time. */
1887 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1
->dr
),
1888 DR_BASE_ADDRESS (dr_a2
->dr
), 0)
1889 || !operand_equal_p (DR_OFFSET (dr_a1
->dr
),
1890 DR_OFFSET (dr_a2
->dr
), 0)
1891 || !poly_int_tree_p (DR_INIT (dr_a1
->dr
), &init_a1
)
1892 || !poly_int_tree_p (DR_INIT (dr_a2
->dr
), &init_a2
))
1895 /* Don't combine if we can't tell which one comes first. */
1896 if (!ordered_p (init_a1
, init_a2
))
1899 /* Work out what the segment length would be if we did combine
1902 - If DR_A1 and DR_A2 have equal lengths, that length is
1903 also the combined length.
1905 - If DR_A1 and DR_A2 both have negative "lengths", the combined
1906 length is the lower bound on those lengths.
1908 - If DR_A1 and DR_A2 both have positive lengths, the combined
1909 length is the upper bound on those lengths.
1911 Other cases are unlikely to give a useful combination.
1913 The lengths both have sizetype, so the sign is taken from
1914 the step instead. */
1915 poly_uint64 new_seg_len
= 0;
1916 bool new_seg_len_p
= !operand_equal_p (dr_a1
->seg_len
,
1920 poly_uint64 seg_len_a1
, seg_len_a2
;
1921 if (!poly_int_tree_p (dr_a1
->seg_len
, &seg_len_a1
)
1922 || !poly_int_tree_p (dr_a2
->seg_len
, &seg_len_a2
))
1925 tree indicator_a
= dr_direction_indicator (dr_a1
->dr
);
1926 if (TREE_CODE (indicator_a
) != INTEGER_CST
)
1929 tree indicator_b
= dr_direction_indicator (dr_a2
->dr
);
1930 if (TREE_CODE (indicator_b
) != INTEGER_CST
)
1933 int sign_a
= tree_int_cst_sgn (indicator_a
);
1934 int sign_b
= tree_int_cst_sgn (indicator_b
);
1936 if (sign_a
<= 0 && sign_b
<= 0)
1937 new_seg_len
= lower_bound (seg_len_a1
, seg_len_a2
);
1938 else if (sign_a
>= 0 && sign_b
>= 0)
1939 new_seg_len
= upper_bound (seg_len_a1
, seg_len_a2
);
1943 /* At this point we're committed to merging the refs. */
1945 /* Make sure dr_a1 starts left of dr_a2. */
1946 if (maybe_gt (init_a1
, init_a2
))
1948 std::swap (*dr_a1
, *dr_a2
);
1949 std::swap (init_a1
, init_a2
);
1952 /* The DR_Bs are equal, so only the DR_As can introduce
1954 if (!operand_equal_p (DR_STEP (dr_a1
->dr
), DR_STEP (dr_a2
->dr
), 0))
1955 alias_pair1
->flags
|= DR_ALIAS_MIXED_STEPS
;
1959 dr_a1
->seg_len
= build_int_cst (TREE_TYPE (dr_a1
->seg_len
),
1961 dr_a1
->align
= MIN (dr_a1
->align
, known_alignment (new_seg_len
));
1964 /* This is always positive due to the swap above. */
1965 poly_uint64 diff
= init_a2
- init_a1
;
1967 /* The new check will start at DR_A1. Make sure that its access
1968 size encompasses the initial DR_A2. */
1969 if (maybe_lt (dr_a1
->access_size
, diff
+ dr_a2
->access_size
))
1971 dr_a1
->access_size
= upper_bound (dr_a1
->access_size
,
1972 diff
+ dr_a2
->access_size
);
1973 unsigned int new_align
= known_alignment (dr_a1
->access_size
);
1974 dr_a1
->align
= MIN (dr_a1
->align
, new_align
);
1976 if (dump_enabled_p ())
1977 dump_printf (MSG_NOTE
, "merging ranges for %T, %T and %T, %T\n",
1978 DR_REF (dr_a1
->dr
), DR_REF (dr_b1
->dr
),
1979 DR_REF (dr_a2
->dr
), DR_REF (dr_b2
->dr
));
1980 alias_pair1
->flags
|= alias_pair2
->flags
;
1984 alias_pairs
->truncate (last
+ 1);
1986 /* Try to restore the original dr_with_seg_len order within each
1987 dr_with_seg_len_pair_t. If we ended up combining swapped and
1988 unswapped pairs into the same check, we have to invalidate any
1989 RAW, WAR and WAW information for it. */
1990 if (dump_enabled_p ())
1991 dump_printf (MSG_NOTE
, "merged alias checks:\n");
1992 FOR_EACH_VEC_ELT (*alias_pairs
, i
, alias_pair
)
1994 unsigned int swap_mask
= (DR_ALIAS_SWAPPED
| DR_ALIAS_UNSWAPPED
);
1995 unsigned int swapped
= (alias_pair
->flags
& swap_mask
);
1996 if (swapped
== DR_ALIAS_SWAPPED
)
1997 std::swap (alias_pair
->first
, alias_pair
->second
);
1998 else if (swapped
!= DR_ALIAS_UNSWAPPED
)
1999 alias_pair
->flags
|= DR_ALIAS_ARBITRARY
;
2000 alias_pair
->flags
&= ~swap_mask
;
2001 if (dump_enabled_p ())
2002 dump_alias_pair (alias_pair
, " ");
2006 /* A subroutine of create_intersect_range_checks, with a subset of the
2007 same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
2008 to optimize cases in which the references form a simple RAW, WAR or
2012 create_ifn_alias_checks (tree
*cond_expr
,
2013 const dr_with_seg_len_pair_t
&alias_pair
)
2015 const dr_with_seg_len
& dr_a
= alias_pair
.first
;
2016 const dr_with_seg_len
& dr_b
= alias_pair
.second
;
2018 /* Check for cases in which:
2020 (a) we have a known RAW, WAR or WAR dependence
2021 (b) the accesses are well-ordered in both the original and new code
2022 (see the comment above the DR_ALIAS_* flags for details); and
2023 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2024 if (alias_pair
.flags
& ~(DR_ALIAS_RAW
| DR_ALIAS_WAR
| DR_ALIAS_WAW
))
2027 /* Make sure that both DRs access the same pattern of bytes,
2028 with a constant length and step. */
2029 poly_uint64 seg_len
;
2030 if (!operand_equal_p (dr_a
.seg_len
, dr_b
.seg_len
, 0)
2031 || !poly_int_tree_p (dr_a
.seg_len
, &seg_len
)
2032 || maybe_ne (dr_a
.access_size
, dr_b
.access_size
)
2033 || !operand_equal_p (DR_STEP (dr_a
.dr
), DR_STEP (dr_b
.dr
), 0)
2034 || !tree_fits_uhwi_p (DR_STEP (dr_a
.dr
)))
2037 unsigned HOST_WIDE_INT bytes
= tree_to_uhwi (DR_STEP (dr_a
.dr
));
2038 tree addr_a
= DR_BASE_ADDRESS (dr_a
.dr
);
2039 tree addr_b
= DR_BASE_ADDRESS (dr_b
.dr
);
2041 /* See whether the target suports what we want to do. WAW checks are
2042 equivalent to WAR checks here. */
2043 internal_fn ifn
= (alias_pair
.flags
& DR_ALIAS_RAW
2044 ? IFN_CHECK_RAW_PTRS
2045 : IFN_CHECK_WAR_PTRS
);
2046 unsigned int align
= MIN (dr_a
.align
, dr_b
.align
);
2047 poly_uint64 full_length
= seg_len
+ bytes
;
2048 if (!internal_check_ptrs_fn_supported_p (ifn
, TREE_TYPE (addr_a
),
2049 full_length
, align
))
2051 full_length
= seg_len
+ dr_a
.access_size
;
2052 if (!internal_check_ptrs_fn_supported_p (ifn
, TREE_TYPE (addr_a
),
2053 full_length
, align
))
2057 /* Commit to using this form of test. */
2058 addr_a
= fold_build_pointer_plus (addr_a
, DR_OFFSET (dr_a
.dr
));
2059 addr_a
= fold_build_pointer_plus (addr_a
, DR_INIT (dr_a
.dr
));
2061 addr_b
= fold_build_pointer_plus (addr_b
, DR_OFFSET (dr_b
.dr
));
2062 addr_b
= fold_build_pointer_plus (addr_b
, DR_INIT (dr_b
.dr
));
2064 *cond_expr
= build_call_expr_internal_loc (UNKNOWN_LOCATION
,
2065 ifn
, boolean_type_node
,
2067 size_int (full_length
),
2070 if (dump_enabled_p ())
2072 if (ifn
== IFN_CHECK_RAW_PTRS
)
2073 dump_printf (MSG_NOTE
, "using an IFN_CHECK_RAW_PTRS test\n");
2075 dump_printf (MSG_NOTE
, "using an IFN_CHECK_WAR_PTRS test\n");
2080 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2081 free of aliases, using a condition based on index values instead
2082 of a condition based on addresses. Return true on success,
2083 storing the condition in *COND_EXPR.
2085 This can only be done if the two data references in ALIAS_PAIR access
2086 the same array object and the index is the only difference. For example,
2087 if the two data references are DR_A and DR_B:
2090 data-ref arr[i] arr[j]
2092 index {i_0, +, 1}_loop {j_0, +, 1}_loop
2094 The addresses and their index are like:
2096 |<- ADDR_A ->| |<- ADDR_B ->|
2097 ------------------------------------------------------->
2099 ------------------------------------------------------->
2100 i_0 ... i_0+4 j_0 ... j_0+4
2102 We can create expression based on index rather than address:
2104 (unsigned) (i_0 - j_0 + 3) <= 6
2106 i.e. the indices are less than 4 apart.
2108 Note evolution step of index needs to be considered in comparison. */
2111 create_intersect_range_checks_index (class loop
*loop
, tree
*cond_expr
,
2112 const dr_with_seg_len_pair_t
&alias_pair
)
2114 const dr_with_seg_len
&dr_a
= alias_pair
.first
;
2115 const dr_with_seg_len
&dr_b
= alias_pair
.second
;
2116 if ((alias_pair
.flags
& DR_ALIAS_MIXED_STEPS
)
2117 || integer_zerop (DR_STEP (dr_a
.dr
))
2118 || integer_zerop (DR_STEP (dr_b
.dr
))
2119 || DR_NUM_DIMENSIONS (dr_a
.dr
) != DR_NUM_DIMENSIONS (dr_b
.dr
))
2122 poly_uint64 seg_len1
, seg_len2
;
2123 if (!poly_int_tree_p (dr_a
.seg_len
, &seg_len1
)
2124 || !poly_int_tree_p (dr_b
.seg_len
, &seg_len2
))
2127 if (!tree_fits_shwi_p (DR_STEP (dr_a
.dr
)))
2130 if (!operand_equal_p (DR_BASE_OBJECT (dr_a
.dr
), DR_BASE_OBJECT (dr_b
.dr
), 0))
2133 if (!operand_equal_p (DR_STEP (dr_a
.dr
), DR_STEP (dr_b
.dr
), 0))
2136 gcc_assert (TREE_CODE (DR_STEP (dr_a
.dr
)) == INTEGER_CST
);
2138 bool neg_step
= tree_int_cst_compare (DR_STEP (dr_a
.dr
), size_zero_node
) < 0;
2139 unsigned HOST_WIDE_INT abs_step
= tree_to_shwi (DR_STEP (dr_a
.dr
));
2142 abs_step
= -abs_step
;
2143 seg_len1
= (-wi::to_poly_wide (dr_a
.seg_len
)).force_uhwi ();
2144 seg_len2
= (-wi::to_poly_wide (dr_b
.seg_len
)).force_uhwi ();
2147 /* Infer the number of iterations with which the memory segment is accessed
2148 by DR. In other words, alias is checked if memory segment accessed by
2149 DR_A in some iterations intersect with memory segment accessed by DR_B
2150 in the same amount iterations.
2151 Note segnment length is a linear function of number of iterations with
2152 DR_STEP as the coefficient. */
2153 poly_uint64 niter_len1
, niter_len2
;
2154 if (!can_div_trunc_p (seg_len1
+ abs_step
- 1, abs_step
, &niter_len1
)
2155 || !can_div_trunc_p (seg_len2
+ abs_step
- 1, abs_step
, &niter_len2
))
2158 /* Divide each access size by the byte step, rounding up. */
2159 poly_uint64 niter_access1
, niter_access2
;
2160 if (!can_div_trunc_p (dr_a
.access_size
+ abs_step
- 1,
2161 abs_step
, &niter_access1
)
2162 || !can_div_trunc_p (dr_b
.access_size
+ abs_step
- 1,
2163 abs_step
, &niter_access2
))
2166 bool waw_or_war_p
= (alias_pair
.flags
& ~(DR_ALIAS_WAR
| DR_ALIAS_WAW
)) == 0;
2169 for (unsigned int i
= 0; i
< DR_NUM_DIMENSIONS (dr_a
.dr
); i
++)
2171 tree access1
= DR_ACCESS_FN (dr_a
.dr
, i
);
2172 tree access2
= DR_ACCESS_FN (dr_b
.dr
, i
);
2173 /* Two indices must be the same if they are not scev, or not scev wrto
2174 current loop being vecorized. */
2175 if (TREE_CODE (access1
) != POLYNOMIAL_CHREC
2176 || TREE_CODE (access2
) != POLYNOMIAL_CHREC
2177 || CHREC_VARIABLE (access1
) != (unsigned)loop
->num
2178 || CHREC_VARIABLE (access2
) != (unsigned)loop
->num
)
2180 if (operand_equal_p (access1
, access2
, 0))
2190 /* Ought not to happen in practice, since if all accesses are equal then the
2191 alias should be decidable at compile time. */
2195 /* The two indices must have the same step. */
2196 tree access1
= DR_ACCESS_FN (dr_a
.dr
, found
);
2197 tree access2
= DR_ACCESS_FN (dr_b
.dr
, found
);
2198 if (!operand_equal_p (CHREC_RIGHT (access1
), CHREC_RIGHT (access2
), 0))
2201 tree idx_step
= CHREC_RIGHT (access1
);
2202 /* Index must have const step, otherwise DR_STEP won't be constant. */
2203 gcc_assert (TREE_CODE (idx_step
) == INTEGER_CST
);
2204 /* Index must evaluate in the same direction as DR. */
2205 gcc_assert (!neg_step
|| tree_int_cst_sign_bit (idx_step
) == 1);
2207 tree min1
= CHREC_LEFT (access1
);
2208 tree min2
= CHREC_LEFT (access2
);
2209 if (!types_compatible_p (TREE_TYPE (min1
), TREE_TYPE (min2
)))
2212 /* Ideally, alias can be checked against loop's control IV, but we
2213 need to prove linear mapping between control IV and reference
2214 index. Although that should be true, we check against (array)
2215 index of data reference. Like segment length, index length is
2216 linear function of the number of iterations with index_step as
2217 the coefficient, i.e, niter_len * idx_step. */
2218 offset_int abs_idx_step
= offset_int::from (wi::to_wide (idx_step
),
2221 abs_idx_step
= -abs_idx_step
;
2222 poly_offset_int idx_len1
= abs_idx_step
* niter_len1
;
2223 poly_offset_int idx_len2
= abs_idx_step
* niter_len2
;
2224 poly_offset_int idx_access1
= abs_idx_step
* niter_access1
;
2225 poly_offset_int idx_access2
= abs_idx_step
* niter_access2
;
2227 gcc_assert (known_ge (idx_len1
, 0)
2228 && known_ge (idx_len2
, 0)
2229 && known_ge (idx_access1
, 0)
2230 && known_ge (idx_access2
, 0));
2232 /* Each access has the following pattern, with lengths measured
2236 <--- A: -ve step --->
2237 +-----+-------+-----+-------+-----+
2238 | n-1 | ..... | 0 | ..... | n-1 |
2239 +-----+-------+-----+-------+-----+
2240 <--- B: +ve step --->
2245 where "n" is the number of scalar iterations covered by the segment
2246 and where each access spans idx_access units.
2248 A is the range of bytes accessed when the step is negative,
2249 B is the range when the step is positive.
2251 When checking for general overlap, we need to test whether
2254 [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2258 [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2262 low_offsetN = +ve step ? 0 : -idx_lenN;
2263 high_offsetN = +ve step ? idx_lenN : 0;
2265 This is equivalent to testing whether:
2267 min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2268 && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2270 Converting this into a single test, there is an overlap if:
2272 0 <= min2 - min1 + bias <= limit
2274 where bias = high_offset2 + idx_access2 - 1 - low_offset1
2275 limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2276 + (high_offset2 - low_offset2 + idx_access2 - 1)
2277 i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2279 Combining the tests requires limit to be computable in an unsigned
2280 form of the index type; if it isn't, we fall back to the usual
2281 pointer-based checks.
2283 We can do better if DR_B is a write and if DR_A and DR_B are
2284 well-ordered in both the original and the new code (see the
2285 comment above the DR_ALIAS_* flags for details). In this case
2286 we know that for each i in [0, n-1], the write performed by
2287 access i of DR_B occurs after access numbers j<=i of DR_A in
2288 both the original and the new code. Any write or anti
2289 dependencies wrt those DR_A accesses are therefore maintained.
2291 We just need to make sure that each individual write in DR_B does not
2292 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2293 after the DR_B access in the original code but happen before it in
2296 We know the steps for both accesses are equal, so by induction, we
2297 just need to test whether the first write of DR_B overlaps a later
2298 access of DR_A. In other words, we need to move min1 along by
2301 min1' = min1 + idx_step
2305 [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2309 [min2, min2 + idx_access2 - 1]
2313 low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2314 high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
2316 idx_len1
-= abs_idx_step
;
2318 poly_offset_int limit
= idx_len1
+ idx_access1
- 1 + idx_access2
- 1;
2322 tree utype
= unsigned_type_for (TREE_TYPE (min1
));
2323 if (!wi::fits_to_tree_p (limit
, utype
))
2326 poly_offset_int low_offset1
= neg_step
? -idx_len1
: 0;
2327 poly_offset_int high_offset2
= neg_step
|| waw_or_war_p
? 0 : idx_len2
;
2328 poly_offset_int bias
= high_offset2
+ idx_access2
- 1 - low_offset1
;
2329 /* Equivalent to adding IDX_STEP to MIN1. */
2331 bias
-= wi::to_offset (idx_step
);
2333 tree subject
= fold_build2 (MINUS_EXPR
, utype
,
2334 fold_convert (utype
, min2
),
2335 fold_convert (utype
, min1
));
2336 subject
= fold_build2 (PLUS_EXPR
, utype
, subject
,
2337 wide_int_to_tree (utype
, bias
));
2338 tree part_cond_expr
= fold_build2 (GT_EXPR
, boolean_type_node
, subject
,
2339 wide_int_to_tree (utype
, limit
));
2341 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2342 *cond_expr
, part_cond_expr
);
2344 *cond_expr
= part_cond_expr
;
2345 if (dump_enabled_p ())
2348 dump_printf (MSG_NOTE
, "using an index-based WAR/WAW test\n");
2350 dump_printf (MSG_NOTE
, "using an index-based overlap test\n");
2355 /* A subroutine of create_intersect_range_checks, with a subset of the
2356 same arguments. Try to optimize cases in which the second access
2357 is a write and in which some overlap is valid. */
2360 create_waw_or_war_checks (tree
*cond_expr
,
2361 const dr_with_seg_len_pair_t
&alias_pair
)
2363 const dr_with_seg_len
& dr_a
= alias_pair
.first
;
2364 const dr_with_seg_len
& dr_b
= alias_pair
.second
;
2366 /* Check for cases in which:
2368 (a) DR_B is always a write;
2369 (b) the accesses are well-ordered in both the original and new code
2370 (see the comment above the DR_ALIAS_* flags for details); and
2371 (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
2372 if (alias_pair
.flags
& ~(DR_ALIAS_WAR
| DR_ALIAS_WAW
))
2375 /* Check for equal (but possibly variable) steps. */
2376 tree step
= DR_STEP (dr_a
.dr
);
2377 if (!operand_equal_p (step
, DR_STEP (dr_b
.dr
)))
2380 /* Make sure that we can operate on sizetype without loss of precision. */
2381 tree addr_type
= TREE_TYPE (DR_BASE_ADDRESS (dr_a
.dr
));
2382 if (TYPE_PRECISION (addr_type
) != TYPE_PRECISION (sizetype
))
2385 /* All addresses involved are known to have a common alignment ALIGN.
2386 We can therefore subtract ALIGN from an exclusive endpoint to get
2387 an inclusive endpoint. In the best (and common) case, ALIGN is the
2388 same as the access sizes of both DRs, and so subtracting ALIGN
2389 cancels out the addition of an access size. */
2390 unsigned int align
= MIN (dr_a
.align
, dr_b
.align
);
2391 poly_uint64 last_chunk_a
= dr_a
.access_size
- align
;
2392 poly_uint64 last_chunk_b
= dr_b
.access_size
- align
;
2394 /* Get a boolean expression that is true when the step is negative. */
2395 tree indicator
= dr_direction_indicator (dr_a
.dr
);
2396 tree neg_step
= fold_build2 (LT_EXPR
, boolean_type_node
,
2397 fold_convert (ssizetype
, indicator
),
2400 /* Get lengths in sizetype. */
2402 = fold_convert (sizetype
, rewrite_to_non_trapping_overflow (dr_a
.seg_len
));
2403 step
= fold_convert (sizetype
, rewrite_to_non_trapping_overflow (step
));
2405 /* Each access has the following pattern:
2408 <--- A: -ve step --->
2409 +-----+-------+-----+-------+-----+
2410 | n-1 | ..... | 0 | ..... | n-1 |
2411 +-----+-------+-----+-------+-----+
2412 <--- B: +ve step --->
2417 where "n" is the number of scalar iterations covered by the segment.
2419 A is the range of bytes accessed when the step is negative,
2420 B is the range when the step is positive.
2422 We know that DR_B is a write. We also know (from checking that
2423 DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2424 the write performed by access i of DR_B occurs after access numbers
2425 j<=i of DR_A in both the original and the new code. Any write or
2426 anti dependencies wrt those DR_A accesses are therefore maintained.
2428 We just need to make sure that each individual write in DR_B does not
2429 overlap any higher-indexed access in DR_A; such DR_A accesses happen
2430 after the DR_B access in the original code but happen before it in
2433 We know the steps for both accesses are equal, so by induction, we
2434 just need to test whether the first write of DR_B overlaps a later
2435 access of DR_A. In other words, we need to move addr_a along by
2438 addr_a' = addr_a + step
2442 [addr_b, addr_b + last_chunk_b]
2446 [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2448 where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
2450 low_offset_a = +ve step ? 0 : seg_len_a - step
2451 high_offset_a = +ve step ? seg_len_a - step : 0
2453 This is equivalent to testing whether:
2455 addr_a' + low_offset_a <= addr_b + last_chunk_b
2456 && addr_b <= addr_a' + high_offset_a + last_chunk_a
2458 Converting this into a single test, there is an overlap if:
2460 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2462 where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2464 If DR_A is performed, limit + |step| - last_chunk_b is known to be
2465 less than the size of the object underlying DR_A. We also know
2466 that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2467 guaranteed at compile time. There can therefore be no overflow if
2468 "limit" is calculated in an unsigned type with pointer precision. */
2469 tree addr_a
= fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a
.dr
),
2470 DR_OFFSET (dr_a
.dr
));
2471 addr_a
= fold_build_pointer_plus (addr_a
, DR_INIT (dr_a
.dr
));
2473 tree addr_b
= fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b
.dr
),
2474 DR_OFFSET (dr_b
.dr
));
2475 addr_b
= fold_build_pointer_plus (addr_b
, DR_INIT (dr_b
.dr
));
2477 /* Advance ADDR_A by one iteration and adjust the length to compensate. */
2478 addr_a
= fold_build_pointer_plus (addr_a
, step
);
2479 tree seg_len_a_minus_step
= fold_build2 (MINUS_EXPR
, sizetype
,
2481 if (!CONSTANT_CLASS_P (seg_len_a_minus_step
))
2482 seg_len_a_minus_step
= build1 (SAVE_EXPR
, sizetype
, seg_len_a_minus_step
);
2484 tree low_offset_a
= fold_build3 (COND_EXPR
, sizetype
, neg_step
,
2485 seg_len_a_minus_step
, size_zero_node
);
2486 if (!CONSTANT_CLASS_P (low_offset_a
))
2487 low_offset_a
= build1 (SAVE_EXPR
, sizetype
, low_offset_a
);
2489 /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2490 but it's usually more efficient to reuse the LOW_OFFSET_A result. */
2491 tree high_offset_a
= fold_build2 (MINUS_EXPR
, sizetype
, seg_len_a_minus_step
,
2494 /* The amount added to addr_b - addr_a'. */
2495 tree bias
= fold_build2 (MINUS_EXPR
, sizetype
,
2496 size_int (last_chunk_b
), low_offset_a
);
2498 tree limit
= fold_build2 (MINUS_EXPR
, sizetype
, high_offset_a
, low_offset_a
);
2499 limit
= fold_build2 (PLUS_EXPR
, sizetype
, limit
,
2500 size_int (last_chunk_a
+ last_chunk_b
));
2502 tree subject
= fold_build2 (POINTER_DIFF_EXPR
, ssizetype
, addr_b
, addr_a
);
2503 subject
= fold_build2 (PLUS_EXPR
, sizetype
,
2504 fold_convert (sizetype
, subject
), bias
);
2506 *cond_expr
= fold_build2 (GT_EXPR
, boolean_type_node
, subject
, limit
);
2507 if (dump_enabled_p ())
2508 dump_printf (MSG_NOTE
, "using an address-based WAR/WAW test\n");
2512 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2513 every address ADDR accessed by D:
2515 *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2517 In this case, every element accessed by D is aligned to at least
2520 If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2522 *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */
2525 get_segment_min_max (const dr_with_seg_len
&d
, tree
*seg_min_out
,
2526 tree
*seg_max_out
, HOST_WIDE_INT align
)
2528 /* Each access has the following pattern:
2531 <--- A: -ve step --->
2532 +-----+-------+-----+-------+-----+
2533 | n-1 | ,.... | 0 | ..... | n-1 |
2534 +-----+-------+-----+-------+-----+
2535 <--- B: +ve step --->
2540 where "n" is the number of scalar iterations covered by the segment.
2541 (This should be VF for a particular pair if we know that both steps
2542 are the same, otherwise it will be the full number of scalar loop
2545 A is the range of bytes accessed when the step is negative,
2546 B is the range when the step is positive.
2548 If the access size is "access_size" bytes, the lowest addressed byte is:
2550 base + (step < 0 ? seg_len : 0) [LB]
2552 and the highest addressed byte is always below:
2554 base + (step < 0 ? 0 : seg_len) + access_size [UB]
2560 If ALIGN is nonzero, all three values are aligned to at least ALIGN
2563 LB <= ADDR <= UB - ALIGN
2565 where "- ALIGN" folds naturally with the "+ access_size" and often
2568 We don't try to simplify LB and UB beyond this (e.g. by using
2569 MIN and MAX based on whether seg_len rather than the stride is
2570 negative) because it is possible for the absolute size of the
2571 segment to overflow the range of a ssize_t.
2573 Keeping the pointer_plus outside of the cond_expr should allow
2574 the cond_exprs to be shared with other alias checks. */
2575 tree indicator
= dr_direction_indicator (d
.dr
);
2576 tree neg_step
= fold_build2 (LT_EXPR
, boolean_type_node
,
2577 fold_convert (ssizetype
, indicator
),
2579 tree addr_base
= fold_build_pointer_plus (DR_BASE_ADDRESS (d
.dr
),
2581 addr_base
= fold_build_pointer_plus (addr_base
, DR_INIT (d
.dr
));
2583 = fold_convert (sizetype
, rewrite_to_non_trapping_overflow (d
.seg_len
));
2585 tree min_reach
= fold_build3 (COND_EXPR
, sizetype
, neg_step
,
2586 seg_len
, size_zero_node
);
2587 tree max_reach
= fold_build3 (COND_EXPR
, sizetype
, neg_step
,
2588 size_zero_node
, seg_len
);
2589 max_reach
= fold_build2 (PLUS_EXPR
, sizetype
, max_reach
,
2590 size_int (d
.access_size
- align
));
2592 *seg_min_out
= fold_build_pointer_plus (addr_base
, min_reach
);
2593 *seg_max_out
= fold_build_pointer_plus (addr_base
, max_reach
);
2596 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2597 storing the condition in *COND_EXPR. The fallback is to generate a
2598 a test that the two accesses do not overlap:
2600 end_a <= start_b || end_b <= start_a. */
2603 create_intersect_range_checks (class loop
*loop
, tree
*cond_expr
,
2604 const dr_with_seg_len_pair_t
&alias_pair
)
2606 const dr_with_seg_len
& dr_a
= alias_pair
.first
;
2607 const dr_with_seg_len
& dr_b
= alias_pair
.second
;
2608 *cond_expr
= NULL_TREE
;
2609 if (create_intersect_range_checks_index (loop
, cond_expr
, alias_pair
))
2612 if (create_ifn_alias_checks (cond_expr
, alias_pair
))
2615 if (create_waw_or_war_checks (cond_expr
, alias_pair
))
2618 unsigned HOST_WIDE_INT min_align
;
2620 /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2621 are equivalent. This is just an optimization heuristic. */
2622 if (TREE_CODE (DR_STEP (dr_a
.dr
)) == INTEGER_CST
2623 && TREE_CODE (DR_STEP (dr_b
.dr
)) == INTEGER_CST
)
2625 /* In this case adding access_size to seg_len is likely to give
2626 a simple X * step, where X is either the number of scalar
2627 iterations or the vectorization factor. We're better off
2628 keeping that, rather than subtracting an alignment from it.
2630 In this case the maximum values are exclusive and so there is
2631 no alias if the maximum of one segment equals the minimum
2638 /* Calculate the minimum alignment shared by all four pointers,
2639 then arrange for this alignment to be subtracted from the
2640 exclusive maximum values to get inclusive maximum values.
2641 This "- min_align" is cumulative with a "+ access_size"
2642 in the calculation of the maximum values. In the best
2643 (and common) case, the two cancel each other out, leaving
2644 us with an inclusive bound based only on seg_len. In the
2645 worst case we're simply adding a smaller number than before.
2647 Because the maximum values are inclusive, there is an alias
2648 if the maximum value of one segment is equal to the minimum
2649 value of the other. */
2650 min_align
= std::min (dr_a
.align
, dr_b
.align
);
2651 min_align
= std::min (min_align
, known_alignment (dr_a
.access_size
));
2652 min_align
= std::min (min_align
, known_alignment (dr_b
.access_size
));
2656 tree seg_a_min
, seg_a_max
, seg_b_min
, seg_b_max
;
2657 get_segment_min_max (dr_a
, &seg_a_min
, &seg_a_max
, min_align
);
2658 get_segment_min_max (dr_b
, &seg_b_min
, &seg_b_max
, min_align
);
2661 = fold_build2 (TRUTH_OR_EXPR
, boolean_type_node
,
2662 fold_build2 (cmp_code
, boolean_type_node
, seg_a_max
, seg_b_min
),
2663 fold_build2 (cmp_code
, boolean_type_node
, seg_b_max
, seg_a_min
));
2664 if (dump_enabled_p ())
2665 dump_printf (MSG_NOTE
, "using an address-based overlap test\n");
2668 /* Create a conditional expression that represents the run-time checks for
2669 overlapping of address ranges represented by a list of data references
2670 pairs passed in ALIAS_PAIRS. Data references are in LOOP. The returned
2671 COND_EXPR is the conditional expression to be used in the if statement
2672 that controls which version of the loop gets executed at runtime. */
2675 create_runtime_alias_checks (class loop
*loop
,
2676 const vec
<dr_with_seg_len_pair_t
> *alias_pairs
,
2679 tree part_cond_expr
;
2681 fold_defer_overflow_warnings ();
2682 for (const dr_with_seg_len_pair_t
&alias_pair
: alias_pairs
)
2684 gcc_assert (alias_pair
.flags
);
2685 if (dump_enabled_p ())
2686 dump_printf (MSG_NOTE
,
2687 "create runtime check for data references %T and %T\n",
2688 DR_REF (alias_pair
.first
.dr
),
2689 DR_REF (alias_pair
.second
.dr
));
2691 /* Create condition expression for each pair data references. */
2692 create_intersect_range_checks (loop
, &part_cond_expr
, alias_pair
);
2694 *cond_expr
= fold_build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2695 *cond_expr
, part_cond_expr
);
2697 *cond_expr
= part_cond_expr
;
2699 fold_undefer_and_ignore_overflow_warnings ();
2702 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2705 dr_equal_offsets_p1 (tree offset1
, tree offset2
)
2709 STRIP_NOPS (offset1
);
2710 STRIP_NOPS (offset2
);
2712 if (offset1
== offset2
)
2715 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
2716 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
2719 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 0),
2720 TREE_OPERAND (offset2
, 0));
2722 if (!res
|| !BINARY_CLASS_P (offset1
))
2725 res
= dr_equal_offsets_p1 (TREE_OPERAND (offset1
, 1),
2726 TREE_OPERAND (offset2
, 1));
2731 /* Check if DRA and DRB have equal offsets. */
2733 dr_equal_offsets_p (struct data_reference
*dra
,
2734 struct data_reference
*drb
)
2736 tree offset1
, offset2
;
2738 offset1
= DR_OFFSET (dra
);
2739 offset2
= DR_OFFSET (drb
);
2741 return dr_equal_offsets_p1 (offset1
, offset2
);
2744 /* Returns true if FNA == FNB. */
2747 affine_function_equal_p (affine_fn fna
, affine_fn fnb
)
2749 unsigned i
, n
= fna
.length ();
2751 if (n
!= fnb
.length ())
2754 for (i
= 0; i
< n
; i
++)
2755 if (!operand_equal_p (fna
[i
], fnb
[i
], 0))
2761 /* If all the functions in CF are the same, returns one of them,
2762 otherwise returns NULL. */
2765 common_affine_function (conflict_function
*cf
)
2770 if (!CF_NONTRIVIAL_P (cf
))
2771 return affine_fn ();
2775 for (i
= 1; i
< cf
->n
; i
++)
2776 if (!affine_function_equal_p (comm
, cf
->fns
[i
]))
2777 return affine_fn ();
2782 /* Returns the base of the affine function FN. */
2785 affine_function_base (affine_fn fn
)
2790 /* Returns true if FN is a constant. */
2793 affine_function_constant_p (affine_fn fn
)
2798 for (i
= 1; fn
.iterate (i
, &coef
); i
++)
2799 if (!integer_zerop (coef
))
2805 /* Returns true if FN is the zero constant function. */
2808 affine_function_zero_p (affine_fn fn
)
2810 return (integer_zerop (affine_function_base (fn
))
2811 && affine_function_constant_p (fn
));
2814 /* Returns a signed integer type with the largest precision from TA
2818 signed_type_for_types (tree ta
, tree tb
)
2820 if (TYPE_PRECISION (ta
) > TYPE_PRECISION (tb
))
2821 return signed_type_for (ta
);
2823 return signed_type_for (tb
);
2826 /* Applies operation OP on affine functions FNA and FNB, and returns the
2830 affine_fn_op (enum tree_code op
, affine_fn fna
, affine_fn fnb
)
2836 if (fnb
.length () > fna
.length ())
2848 for (i
= 0; i
< n
; i
++)
2850 tree type
= signed_type_for_types (TREE_TYPE (fna
[i
]),
2851 TREE_TYPE (fnb
[i
]));
2852 ret
.quick_push (fold_build2 (op
, type
, fna
[i
], fnb
[i
]));
2855 for (; fna
.iterate (i
, &coef
); i
++)
2856 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
2857 coef
, integer_zero_node
));
2858 for (; fnb
.iterate (i
, &coef
); i
++)
2859 ret
.quick_push (fold_build2 (op
, signed_type_for (TREE_TYPE (coef
)),
2860 integer_zero_node
, coef
));
2865 /* Returns the sum of affine functions FNA and FNB. */
2868 affine_fn_plus (affine_fn fna
, affine_fn fnb
)
2870 return affine_fn_op (PLUS_EXPR
, fna
, fnb
);
2873 /* Returns the difference of affine functions FNA and FNB. */
2876 affine_fn_minus (affine_fn fna
, affine_fn fnb
)
2878 return affine_fn_op (MINUS_EXPR
, fna
, fnb
);
2881 /* Frees affine function FN. */
2884 affine_fn_free (affine_fn fn
)
2889 /* Determine for each subscript in the data dependence relation DDR
2893 compute_subscript_distance (struct data_dependence_relation
*ddr
)
2895 conflict_function
*cf_a
, *cf_b
;
2896 affine_fn fn_a
, fn_b
, diff
;
2898 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
2902 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
2904 struct subscript
*subscript
;
2906 subscript
= DDR_SUBSCRIPT (ddr
, i
);
2907 cf_a
= SUB_CONFLICTS_IN_A (subscript
);
2908 cf_b
= SUB_CONFLICTS_IN_B (subscript
);
2910 fn_a
= common_affine_function (cf_a
);
2911 fn_b
= common_affine_function (cf_b
);
2912 if (!fn_a
.exists () || !fn_b
.exists ())
2914 SUB_DISTANCE (subscript
) = chrec_dont_know
;
2917 diff
= affine_fn_minus (fn_a
, fn_b
);
2919 if (affine_function_constant_p (diff
))
2920 SUB_DISTANCE (subscript
) = affine_function_base (diff
);
2922 SUB_DISTANCE (subscript
) = chrec_dont_know
;
2924 affine_fn_free (diff
);
2929 /* Returns the conflict function for "unknown". */
2931 static conflict_function
*
2932 conflict_fn_not_known (void)
2934 conflict_function
*fn
= XCNEW (conflict_function
);
2940 /* Returns the conflict function for "independent". */
2942 static conflict_function
*
2943 conflict_fn_no_dependence (void)
2945 conflict_function
*fn
= XCNEW (conflict_function
);
2946 fn
->n
= NO_DEPENDENCE
;
2951 /* Returns true if the address of OBJ is invariant in LOOP. */
2954 object_address_invariant_in_loop_p (const class loop
*loop
, const_tree obj
)
2956 while (handled_component_p (obj
))
2958 if (TREE_CODE (obj
) == ARRAY_REF
)
2960 for (int i
= 1; i
< 4; ++i
)
2961 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, i
),
2965 else if (TREE_CODE (obj
) == COMPONENT_REF
)
2967 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 2),
2971 obj
= TREE_OPERAND (obj
, 0);
2974 if (!INDIRECT_REF_P (obj
)
2975 && TREE_CODE (obj
) != MEM_REF
)
2978 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj
, 0),
2982 /* Returns false if we can prove that data references A and B do not alias,
2983 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
2987 dr_may_alias_p (const struct data_reference
*a
, const struct data_reference
*b
,
2988 class loop
*loop_nest
)
2990 tree addr_a
= DR_BASE_OBJECT (a
);
2991 tree addr_b
= DR_BASE_OBJECT (b
);
2993 /* If we are not processing a loop nest but scalar code we
2994 do not need to care about possible cross-iteration dependences
2995 and thus can process the full original reference. Do so,
2996 similar to how loop invariant motion applies extra offset-based
3000 tree tree_size_a
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a
)));
3001 tree tree_size_b
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b
)));
3003 if (DR_BASE_ADDRESS (a
)
3004 && DR_BASE_ADDRESS (b
)
3005 && operand_equal_p (DR_BASE_ADDRESS (a
), DR_BASE_ADDRESS (b
))
3006 && operand_equal_p (DR_OFFSET (a
), DR_OFFSET (b
))
3007 && poly_int_tree_p (tree_size_a
)
3008 && poly_int_tree_p (tree_size_b
)
3009 && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a
)),
3010 wi::to_poly_widest (tree_size_a
),
3011 wi::to_poly_widest (DR_INIT (b
)),
3012 wi::to_poly_widest (tree_size_b
)))
3014 gcc_assert (integer_zerop (DR_STEP (a
))
3015 && integer_zerop (DR_STEP (b
)));
3019 aff_tree off1
, off2
;
3020 poly_widest_int size1
, size2
;
3021 get_inner_reference_aff (DR_REF (a
), &off1
, &size1
);
3022 get_inner_reference_aff (DR_REF (b
), &off2
, &size2
);
3023 aff_combination_scale (&off1
, -1);
3024 aff_combination_add (&off2
, &off1
);
3025 if (aff_comb_cannot_overlap_p (&off2
, size1
, size2
))
3029 if ((TREE_CODE (addr_a
) == MEM_REF
|| TREE_CODE (addr_a
) == TARGET_MEM_REF
)
3030 && (TREE_CODE (addr_b
) == MEM_REF
|| TREE_CODE (addr_b
) == TARGET_MEM_REF
)
3031 /* For cross-iteration dependences the cliques must be valid for the
3032 whole loop, not just individual iterations. */
3034 || MR_DEPENDENCE_CLIQUE (addr_a
) == 1
3035 || MR_DEPENDENCE_CLIQUE (addr_a
) == loop_nest
->owned_clique
)
3036 && MR_DEPENDENCE_CLIQUE (addr_a
) == MR_DEPENDENCE_CLIQUE (addr_b
)
3037 && MR_DEPENDENCE_BASE (addr_a
) != MR_DEPENDENCE_BASE (addr_b
))
3040 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3041 do not know the size of the base-object. So we cannot do any
3042 offset/overlap based analysis but have to rely on points-to
3043 information only. */
3044 if (TREE_CODE (addr_a
) == MEM_REF
3045 && (DR_UNCONSTRAINED_BASE (a
)
3046 || TREE_CODE (TREE_OPERAND (addr_a
, 0)) == SSA_NAME
))
3048 /* For true dependences we can apply TBAA. */
3049 if (flag_strict_aliasing
3050 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
3051 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
3052 get_alias_set (DR_REF (b
))))
3054 if (TREE_CODE (addr_b
) == MEM_REF
)
3055 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
3056 TREE_OPERAND (addr_b
, 0));
3058 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
3059 build_fold_addr_expr (addr_b
));
3061 else if (TREE_CODE (addr_b
) == MEM_REF
3062 && (DR_UNCONSTRAINED_BASE (b
)
3063 || TREE_CODE (TREE_OPERAND (addr_b
, 0)) == SSA_NAME
))
3065 /* For true dependences we can apply TBAA. */
3066 if (flag_strict_aliasing
3067 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
3068 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
3069 get_alias_set (DR_REF (b
))))
3071 if (TREE_CODE (addr_a
) == MEM_REF
)
3072 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
3073 TREE_OPERAND (addr_b
, 0));
3075 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a
),
3076 TREE_OPERAND (addr_b
, 0));
3078 /* If dr_analyze_innermost failed to handle a component we are
3079 possibly left with a non-base in which case we didn't analyze
3080 a possible evolution of the base when analyzing a loop. */
3082 && (handled_component_p (addr_a
) || handled_component_p (addr_b
)))
3084 /* For true dependences we can apply TBAA. */
3085 if (flag_strict_aliasing
3086 && DR_IS_WRITE (a
) && DR_IS_READ (b
)
3087 && !alias_sets_conflict_p (get_alias_set (DR_REF (a
)),
3088 get_alias_set (DR_REF (b
))))
3090 if (TREE_CODE (addr_a
) == MEM_REF
)
3091 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a
, 0),
3092 build_fold_addr_expr (addr_b
));
3093 else if (TREE_CODE (addr_b
) == MEM_REF
)
3094 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a
),
3095 TREE_OPERAND (addr_b
, 0));
3097 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a
),
3098 build_fold_addr_expr (addr_b
));
3101 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3102 that is being subsetted in the loop nest. */
3103 if (DR_IS_WRITE (a
) && DR_IS_WRITE (b
))
3104 return refs_output_dependent_p (addr_a
, addr_b
);
3105 else if (DR_IS_READ (a
) && DR_IS_WRITE (b
))
3106 return refs_anti_dependent_p (addr_a
, addr_b
);
3107 return refs_may_alias_p (addr_a
, addr_b
);
3110 /* REF_A and REF_B both satisfy access_fn_component_p. Return true
3111 if it is meaningful to compare their associated access functions
3112 when checking for dependencies. */
3115 access_fn_components_comparable_p (tree ref_a
, tree ref_b
)
3117 /* Allow pairs of component refs from the following sets:
3119 { REALPART_EXPR, IMAGPART_EXPR }
3122 tree_code code_a
= TREE_CODE (ref_a
);
3123 tree_code code_b
= TREE_CODE (ref_b
);
3124 if (code_a
== IMAGPART_EXPR
)
3125 code_a
= REALPART_EXPR
;
3126 if (code_b
== IMAGPART_EXPR
)
3127 code_b
= REALPART_EXPR
;
3128 if (code_a
!= code_b
)
3131 if (TREE_CODE (ref_a
) == COMPONENT_REF
)
3132 /* ??? We cannot simply use the type of operand #0 of the refs here as
3133 the Fortran compiler smuggles type punning into COMPONENT_REFs.
3134 Use the DECL_CONTEXT of the FIELD_DECLs instead. */
3135 return (DECL_CONTEXT (TREE_OPERAND (ref_a
, 1))
3136 == DECL_CONTEXT (TREE_OPERAND (ref_b
, 1)));
3138 return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a
, 0)),
3139 TREE_TYPE (TREE_OPERAND (ref_b
, 0)));
3142 /* Initialize a data dependence relation RES in LOOP_NEST. USE_ALT_INDICES
3143 is true when the main indices of A and B were not comparable so we try again
3144 with alternate indices computed on an indirect reference. */
3146 struct data_dependence_relation
*
3147 initialize_data_dependence_relation (struct data_dependence_relation
*res
,
3148 vec
<loop_p
> loop_nest
,
3149 bool use_alt_indices
)
3151 struct data_reference
*a
= DDR_A (res
);
3152 struct data_reference
*b
= DDR_B (res
);
3155 struct indices
*indices_a
= &a
->indices
;
3156 struct indices
*indices_b
= &b
->indices
;
3157 if (use_alt_indices
)
3159 if (TREE_CODE (DR_REF (a
)) != MEM_REF
)
3160 indices_a
= &a
->alt_indices
;
3161 if (TREE_CODE (DR_REF (b
)) != MEM_REF
)
3162 indices_b
= &b
->alt_indices
;
3164 unsigned int num_dimensions_a
= indices_a
->access_fns
.length ();
3165 unsigned int num_dimensions_b
= indices_b
->access_fns
.length ();
3166 if (num_dimensions_a
== 0 || num_dimensions_b
== 0)
3168 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
3172 /* For unconstrained bases, the root (highest-indexed) subscript
3173 describes a variation in the base of the original DR_REF rather
3174 than a component access. We have no type that accurately describes
3175 the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3176 applying this subscript) so limit the search to the last real
3182 f (int a[][8], int b[][8])
3184 for (int i = 0; i < 8; ++i)
3185 a[i * 2][0] = b[i][0];
3188 the a and b accesses have a single ARRAY_REF component reference [0]
3189 but have two subscripts. */
3190 if (indices_a
->unconstrained_base
)
3191 num_dimensions_a
-= 1;
3192 if (indices_b
->unconstrained_base
)
3193 num_dimensions_b
-= 1;
3195 /* These structures describe sequences of component references in
3196 DR_REF (A) and DR_REF (B). Each component reference is tied to a
3197 specific access function. */
3199 /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3200 DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3201 indices. In C notation, these are the indices of the rightmost
3202 component references; e.g. for a sequence .b.c.d, the start
3204 unsigned int start_a
;
3205 unsigned int start_b
;
3207 /* The sequence contains LENGTH consecutive access functions from
3209 unsigned int length
;
3211 /* The enclosing objects for the A and B sequences respectively,
3212 i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3213 and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied. */
3216 } full_seq
= {}, struct_seq
= {};
3218 /* Before each iteration of the loop:
3220 - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3221 - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B). */
3222 unsigned int index_a
= 0;
3223 unsigned int index_b
= 0;
3224 tree ref_a
= DR_REF (a
);
3225 tree ref_b
= DR_REF (b
);
3227 /* Now walk the component references from the final DR_REFs back up to
3228 the enclosing base objects. Each component reference corresponds
3229 to one access function in the DR, with access function 0 being for
3230 the final DR_REF and the highest-indexed access function being the
3231 one that is applied to the base of the DR.
3233 Look for a sequence of component references whose access functions
3234 are comparable (see access_fn_components_comparable_p). If more
3235 than one such sequence exists, pick the one nearest the base
3236 (which is the leftmost sequence in C notation). Store this sequence
3239 For example, if we have:
3241 struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3244 B: __real b[0][i].s.e[i].f
3246 (where d is the same type as the real component of f) then the access
3253 B: __real .f [i] .e .s [i]
3255 The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3256 and [i] is an ARRAY_REF. However, the A1/B3 column contains two
3257 COMPONENT_REF accesses for struct bar, so is comparable. Likewise
3258 the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3259 so is comparable. The A3/B5 column contains two ARRAY_REFs that
3260 index foo[10] arrays, so is again comparable. The sequence is
3263 A: [1, 3] (i.e. [i].s.c)
3264 B: [3, 5] (i.e. [i].s.e)
3266 Also look for sequences of component references whose access
3267 functions are comparable and whose enclosing objects have the same
3268 RECORD_TYPE. Store this sequence in STRUCT_SEQ. In the above
3269 example, STRUCT_SEQ would be:
3271 A: [1, 2] (i.e. s.c)
3272 B: [3, 4] (i.e. s.e) */
3273 while (index_a
< num_dimensions_a
&& index_b
< num_dimensions_b
)
3275 /* The alternate indices form always has a single dimension
3276 with unconstrained base. */
3277 gcc_assert (!use_alt_indices
);
3279 /* REF_A and REF_B must be one of the component access types
3280 allowed by dr_analyze_indices. */
3281 gcc_checking_assert (access_fn_component_p (ref_a
));
3282 gcc_checking_assert (access_fn_component_p (ref_b
));
3284 /* Get the immediately-enclosing objects for REF_A and REF_B,
3285 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3286 and DR_ACCESS_FN (B, INDEX_B). */
3287 tree object_a
= TREE_OPERAND (ref_a
, 0);
3288 tree object_b
= TREE_OPERAND (ref_b
, 0);
3290 tree type_a
= TREE_TYPE (object_a
);
3291 tree type_b
= TREE_TYPE (object_b
);
3292 if (access_fn_components_comparable_p (ref_a
, ref_b
))
3294 /* This pair of component accesses is comparable for dependence
3295 analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3296 DR_ACCESS_FN (B, INDEX_B) in the sequence. */
3297 if (full_seq
.start_a
+ full_seq
.length
!= index_a
3298 || full_seq
.start_b
+ full_seq
.length
!= index_b
)
3300 /* The accesses don't extend the current sequence,
3301 so start a new one here. */
3302 full_seq
.start_a
= index_a
;
3303 full_seq
.start_b
= index_b
;
3304 full_seq
.length
= 0;
3307 /* Add this pair of references to the sequence. */
3308 full_seq
.length
+= 1;
3309 full_seq
.object_a
= object_a
;
3310 full_seq
.object_b
= object_b
;
3312 /* If the enclosing objects are structures (and thus have the
3313 same RECORD_TYPE), record the new sequence in STRUCT_SEQ. */
3314 if (TREE_CODE (type_a
) == RECORD_TYPE
)
3315 struct_seq
= full_seq
;
3317 /* Move to the next containing reference for both A and B. */
3325 /* Try to approach equal type sizes. */
3326 if (!COMPLETE_TYPE_P (type_a
)
3327 || !COMPLETE_TYPE_P (type_b
)
3328 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a
))
3329 || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b
)))
3332 unsigned HOST_WIDE_INT size_a
= tree_to_uhwi (TYPE_SIZE_UNIT (type_a
));
3333 unsigned HOST_WIDE_INT size_b
= tree_to_uhwi (TYPE_SIZE_UNIT (type_b
));
3334 if (size_a
<= size_b
)
3339 if (size_b
<= size_a
)
3346 /* See whether FULL_SEQ ends at the base and whether the two bases
3347 are equal. We do not care about TBAA or alignment info so we can
3348 use OEP_ADDRESS_OF to avoid false negatives. */
3349 tree base_a
= indices_a
->base_object
;
3350 tree base_b
= indices_b
->base_object
;
3351 bool same_base_p
= (full_seq
.start_a
+ full_seq
.length
== num_dimensions_a
3352 && full_seq
.start_b
+ full_seq
.length
== num_dimensions_b
3353 && (indices_a
->unconstrained_base
3354 == indices_b
->unconstrained_base
)
3355 && operand_equal_p (base_a
, base_b
, OEP_ADDRESS_OF
)
3356 && (types_compatible_p (TREE_TYPE (base_a
),
3358 || (!base_supports_access_fn_components_p (base_a
)
3359 && !base_supports_access_fn_components_p (base_b
)
3361 (TYPE_SIZE (TREE_TYPE (base_a
)),
3362 TYPE_SIZE (TREE_TYPE (base_b
)), 0)))
3363 && (!loop_nest
.exists ()
3364 || (object_address_invariant_in_loop_p
3365 (loop_nest
[0], base_a
))));
3367 /* If the bases are the same, we can include the base variation too.
3368 E.g. the b accesses in:
3370 for (int i = 0; i < n; ++i)
3371 b[i + 4][0] = b[i][0];
3373 have a definite dependence distance of 4, while for:
3375 for (int i = 0; i < n; ++i)
3376 a[i + 4][0] = b[i][0];
3378 the dependence distance depends on the gap between a and b.
3380 If the bases are different then we can only rely on the sequence
3381 rooted at a structure access, since arrays are allowed to overlap
3382 arbitrarily and change shape arbitrarily. E.g. we treat this as
3387 ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3389 where two lvalues with the same int[4][3] type overlap, and where
3390 both lvalues are distinct from the object's declared type. */
3393 if (indices_a
->unconstrained_base
)
3394 full_seq
.length
+= 1;
3397 full_seq
= struct_seq
;
3399 /* Punt if we didn't find a suitable sequence. */
3400 if (full_seq
.length
== 0)
3403 || (TREE_CODE (DR_REF (a
)) == MEM_REF
3404 && TREE_CODE (DR_REF (b
)) == MEM_REF
)
3405 || may_be_nonaddressable_p (DR_REF (a
))
3406 || may_be_nonaddressable_p (DR_REF (b
)))
3408 /* Fully exhausted possibilities. */
3409 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
3413 /* Try evaluating both DRs as dereferences of pointers. */
3414 if (!a
->alt_indices
.base_object
3415 && TREE_CODE (DR_REF (a
)) != MEM_REF
)
3417 tree alt_ref
= build2 (MEM_REF
, TREE_TYPE (DR_REF (a
)),
3418 build1 (ADDR_EXPR
, ptr_type_node
, DR_REF (a
)),
3420 (reference_alias_ptr_type (DR_REF (a
)), 0));
3421 dr_analyze_indices (&a
->alt_indices
, alt_ref
,
3422 loop_preheader_edge (loop_nest
[0]),
3423 loop_containing_stmt (DR_STMT (a
)));
3425 if (!b
->alt_indices
.base_object
3426 && TREE_CODE (DR_REF (b
)) != MEM_REF
)
3428 tree alt_ref
= build2 (MEM_REF
, TREE_TYPE (DR_REF (b
)),
3429 build1 (ADDR_EXPR
, ptr_type_node
, DR_REF (b
)),
3431 (reference_alias_ptr_type (DR_REF (b
)), 0));
3432 dr_analyze_indices (&b
->alt_indices
, alt_ref
,
3433 loop_preheader_edge (loop_nest
[0]),
3434 loop_containing_stmt (DR_STMT (b
)));
3436 return initialize_data_dependence_relation (res
, loop_nest
, true);
3441 /* Partial overlap is possible for different bases when strict aliasing
3442 is not in effect. It's also possible if either base involves a union
3445 struct s1 { int a[2]; };
3446 struct s2 { struct s1 b; int c; };
3447 struct s3 { int d; struct s1 e; };
3448 union u { struct s2 f; struct s3 g; } *p, *q;
3450 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3451 "p->g.e" (base "p->g") and might partially overlap the s1 at
3452 "q->g.e" (base "q->g"). */
3453 if (!flag_strict_aliasing
3454 || ref_contains_union_access_p (full_seq
.object_a
)
3455 || ref_contains_union_access_p (full_seq
.object_b
))
3457 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
3461 DDR_COULD_BE_INDEPENDENT_P (res
) = true;
3462 if (!loop_nest
.exists ()
3463 || (object_address_invariant_in_loop_p (loop_nest
[0],
3465 && object_address_invariant_in_loop_p (loop_nest
[0],
3466 full_seq
.object_b
)))
3468 DDR_OBJECT_A (res
) = full_seq
.object_a
;
3469 DDR_OBJECT_B (res
) = full_seq
.object_b
;
3473 DDR_AFFINE_P (res
) = true;
3474 DDR_ARE_DEPENDENT (res
) = NULL_TREE
;
3475 DDR_SUBSCRIPTS (res
).create (full_seq
.length
);
3476 DDR_LOOP_NEST (res
) = loop_nest
;
3477 DDR_SELF_REFERENCE (res
) = false;
3479 for (i
= 0; i
< full_seq
.length
; ++i
)
3481 struct subscript
*subscript
;
3483 subscript
= XNEW (struct subscript
);
3484 SUB_ACCESS_FN (subscript
, 0) = indices_a
->access_fns
[full_seq
.start_a
+ i
];
3485 SUB_ACCESS_FN (subscript
, 1) = indices_b
->access_fns
[full_seq
.start_b
+ i
];
3486 SUB_CONFLICTS_IN_A (subscript
) = conflict_fn_not_known ();
3487 SUB_CONFLICTS_IN_B (subscript
) = conflict_fn_not_known ();
3488 SUB_LAST_CONFLICT (subscript
) = chrec_dont_know
;
3489 SUB_DISTANCE (subscript
) = chrec_dont_know
;
3490 DDR_SUBSCRIPTS (res
).safe_push (subscript
);
3496 /* Initialize a data dependence relation between data accesses A and
3497 B. NB_LOOPS is the number of loops surrounding the references: the
3498 size of the classic distance/direction vectors. */
3500 struct data_dependence_relation
*
3501 initialize_data_dependence_relation (struct data_reference
*a
,
3502 struct data_reference
*b
,
3503 vec
<loop_p
> loop_nest
)
3505 data_dependence_relation
*res
= XCNEW (struct data_dependence_relation
);
3508 DDR_LOOP_NEST (res
).create (0);
3509 DDR_SUBSCRIPTS (res
).create (0);
3510 DDR_DIR_VECTS (res
).create (0);
3511 DDR_DIST_VECTS (res
).create (0);
3513 if (a
== NULL
|| b
== NULL
)
3515 DDR_ARE_DEPENDENT (res
) = chrec_dont_know
;
3519 /* If the data references do not alias, then they are independent. */
3520 if (!dr_may_alias_p (a
, b
, loop_nest
.exists () ? loop_nest
[0] : NULL
))
3522 DDR_ARE_DEPENDENT (res
) = chrec_known
;
3526 return initialize_data_dependence_relation (res
, loop_nest
, false);
3530 /* Frees memory used by the conflict function F. */
3533 free_conflict_function (conflict_function
*f
)
3537 if (CF_NONTRIVIAL_P (f
))
3539 for (i
= 0; i
< f
->n
; i
++)
3540 affine_fn_free (f
->fns
[i
]);
3545 /* Frees memory used by SUBSCRIPTS. */
3548 free_subscripts (vec
<subscript_p
> subscripts
)
3550 for (subscript_p s
: subscripts
)
3552 free_conflict_function (s
->conflicting_iterations_in_a
);
3553 free_conflict_function (s
->conflicting_iterations_in_b
);
3556 subscripts
.release ();
3559 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3563 finalize_ddr_dependent (struct data_dependence_relation
*ddr
,
3566 DDR_ARE_DEPENDENT (ddr
) = chrec
;
3567 free_subscripts (DDR_SUBSCRIPTS (ddr
));
3568 DDR_SUBSCRIPTS (ddr
).create (0);
3571 /* The dependence relation DDR cannot be represented by a distance
3575 non_affine_dependence_relation (struct data_dependence_relation
*ddr
)
3577 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3578 fprintf (dump_file
, "(Dependence relation cannot be represented by distance vector.) \n");
3580 DDR_AFFINE_P (ddr
) = false;
3585 /* This section contains the classic Banerjee tests. */
3587 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3588 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
3591 ziv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
3593 return (evolution_function_is_constant_p (chrec_a
)
3594 && evolution_function_is_constant_p (chrec_b
));
3597 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3598 variable, i.e., if the SIV (Single Index Variable) test is true. */
3601 siv_subscript_p (const_tree chrec_a
, const_tree chrec_b
)
3603 if ((evolution_function_is_constant_p (chrec_a
)
3604 && evolution_function_is_univariate_p (chrec_b
))
3605 || (evolution_function_is_constant_p (chrec_b
)
3606 && evolution_function_is_univariate_p (chrec_a
)))
3609 if (evolution_function_is_univariate_p (chrec_a
)
3610 && evolution_function_is_univariate_p (chrec_b
))
3612 switch (TREE_CODE (chrec_a
))
3614 case POLYNOMIAL_CHREC
:
3615 switch (TREE_CODE (chrec_b
))
3617 case POLYNOMIAL_CHREC
:
3618 if (CHREC_VARIABLE (chrec_a
) != CHREC_VARIABLE (chrec_b
))
3634 /* Creates a conflict function with N dimensions. The affine functions
3635 in each dimension follow. */
3637 static conflict_function
*
3638 conflict_fn (unsigned n
, ...)
3641 conflict_function
*ret
= XCNEW (conflict_function
);
3644 gcc_assert (n
> 0 && n
<= MAX_DIM
);
3648 for (i
= 0; i
< n
; i
++)
3649 ret
->fns
[i
] = va_arg (ap
, affine_fn
);
3655 /* Returns constant affine function with value CST. */
3658 affine_fn_cst (tree cst
)
3662 fn
.quick_push (cst
);
3666 /* Returns affine function with single variable, CST + COEF * x_DIM. */
3669 affine_fn_univar (tree cst
, unsigned dim
, tree coef
)
3672 fn
.create (dim
+ 1);
3675 gcc_assert (dim
> 0);
3676 fn
.quick_push (cst
);
3677 for (i
= 1; i
< dim
; i
++)
3678 fn
.quick_push (integer_zero_node
);
3679 fn
.quick_push (coef
);
3683 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
3684 *OVERLAPS_B are initialized to the functions that describe the
3685 relation between the elements accessed twice by CHREC_A and
3686 CHREC_B. For k >= 0, the following property is verified:
3688 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3691 analyze_ziv_subscript (tree chrec_a
,
3693 conflict_function
**overlaps_a
,
3694 conflict_function
**overlaps_b
,
3695 tree
*last_conflicts
)
3697 tree type
, difference
;
3698 dependence_stats
.num_ziv
++;
3700 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3701 fprintf (dump_file
, "(analyze_ziv_subscript \n");
3703 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
3704 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
3705 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
3706 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
3708 switch (TREE_CODE (difference
))
3711 if (integer_zerop (difference
))
3713 /* The difference is equal to zero: the accessed index
3714 overlaps for each iteration in the loop. */
3715 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3716 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3717 *last_conflicts
= chrec_dont_know
;
3718 dependence_stats
.num_ziv_dependent
++;
3722 /* The accesses do not overlap. */
3723 *overlaps_a
= conflict_fn_no_dependence ();
3724 *overlaps_b
= conflict_fn_no_dependence ();
3725 *last_conflicts
= integer_zero_node
;
3726 dependence_stats
.num_ziv_independent
++;
3731 /* We're not sure whether the indexes overlap. For the moment,
3732 conservatively answer "don't know". */
3733 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3734 fprintf (dump_file
, "ziv test failed: difference is non-integer.\n");
3736 *overlaps_a
= conflict_fn_not_known ();
3737 *overlaps_b
= conflict_fn_not_known ();
3738 *last_conflicts
= chrec_dont_know
;
3739 dependence_stats
.num_ziv_unimplemented
++;
3743 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3744 fprintf (dump_file
, ")\n");
3747 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3748 and only if it fits to the int type. If this is not the case, or the
3749 bound on the number of iterations of LOOP could not be derived, returns
3753 max_stmt_executions_tree (class loop
*loop
)
3757 if (!max_stmt_executions (loop
, &nit
))
3758 return chrec_dont_know
;
3760 if (!wi::fits_to_tree_p (nit
, unsigned_type_node
))
3761 return chrec_dont_know
;
3763 return wide_int_to_tree (unsigned_type_node
, nit
);
3766 /* Determine whether the CHREC is always positive/negative. If the expression
3767 cannot be statically analyzed, return false, otherwise set the answer into
3771 chrec_is_positive (tree chrec
, bool *value
)
3773 bool value0
, value1
, value2
;
3774 tree end_value
, nb_iter
;
3776 switch (TREE_CODE (chrec
))
3778 case POLYNOMIAL_CHREC
:
3779 if (!chrec_is_positive (CHREC_LEFT (chrec
), &value0
)
3780 || !chrec_is_positive (CHREC_RIGHT (chrec
), &value1
))
3783 /* FIXME -- overflows. */
3784 if (value0
== value1
)
3790 /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3791 and the proof consists in showing that the sign never
3792 changes during the execution of the loop, from 0 to
3793 loop->nb_iterations. */
3794 if (!evolution_function_is_affine_p (chrec
))
3797 nb_iter
= number_of_latch_executions (get_chrec_loop (chrec
));
3798 if (chrec_contains_undetermined (nb_iter
))
3802 /* TODO -- If the test is after the exit, we may decrease the number of
3803 iterations by one. */
3805 nb_iter
= chrec_fold_minus (type
, nb_iter
, build_int_cst (type
, 1));
3808 end_value
= chrec_apply (CHREC_VARIABLE (chrec
), chrec
, nb_iter
);
3810 if (!chrec_is_positive (end_value
, &value2
))
3814 return value0
== value1
;
3817 switch (tree_int_cst_sgn (chrec
))
3836 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3837 constant, and CHREC_B is an affine function. *OVERLAPS_A and
3838 *OVERLAPS_B are initialized to the functions that describe the
3839 relation between the elements accessed twice by CHREC_A and
3840 CHREC_B. For k >= 0, the following property is verified:
3842 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
3845 analyze_siv_subscript_cst_affine (tree chrec_a
,
3847 conflict_function
**overlaps_a
,
3848 conflict_function
**overlaps_b
,
3849 tree
*last_conflicts
)
3851 bool value0
, value1
, value2
;
3852 tree type
, difference
, tmp
;
3854 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
3855 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
3856 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
3857 difference
= chrec_fold_minus (type
, initial_condition (chrec_b
), chrec_a
);
3859 /* Special case overlap in the first iteration. */
3860 if (integer_zerop (difference
))
3862 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3863 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3864 *last_conflicts
= integer_one_node
;
3868 if (!chrec_is_positive (initial_condition (difference
), &value0
))
3870 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3871 fprintf (dump_file
, "siv test failed: chrec is not positive.\n");
3873 dependence_stats
.num_siv_unimplemented
++;
3874 *overlaps_a
= conflict_fn_not_known ();
3875 *overlaps_b
= conflict_fn_not_known ();
3876 *last_conflicts
= chrec_dont_know
;
3881 if (value0
== false)
3883 if (TREE_CODE (chrec_b
) != POLYNOMIAL_CHREC
3884 || !chrec_is_positive (CHREC_RIGHT (chrec_b
), &value1
))
3886 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3887 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
3889 *overlaps_a
= conflict_fn_not_known ();
3890 *overlaps_b
= conflict_fn_not_known ();
3891 *last_conflicts
= chrec_dont_know
;
3892 dependence_stats
.num_siv_unimplemented
++;
3901 chrec_b = {10, +, 1}
3904 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
3906 HOST_WIDE_INT numiter
;
3907 class loop
*loop
= get_chrec_loop (chrec_b
);
3909 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3910 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
,
3911 fold_build1 (ABS_EXPR
, type
, difference
),
3912 CHREC_RIGHT (chrec_b
));
3913 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
3914 *last_conflicts
= integer_one_node
;
3917 /* Perform weak-zero siv test to see if overlap is
3918 outside the loop bounds. */
3919 numiter
= max_stmt_executions_int (loop
);
3922 && compare_tree_int (tmp
, numiter
) > 0)
3924 free_conflict_function (*overlaps_a
);
3925 free_conflict_function (*overlaps_b
);
3926 *overlaps_a
= conflict_fn_no_dependence ();
3927 *overlaps_b
= conflict_fn_no_dependence ();
3928 *last_conflicts
= integer_zero_node
;
3929 dependence_stats
.num_siv_independent
++;
3932 dependence_stats
.num_siv_dependent
++;
3936 /* When the step does not divide the difference, there are
3940 *overlaps_a
= conflict_fn_no_dependence ();
3941 *overlaps_b
= conflict_fn_no_dependence ();
3942 *last_conflicts
= integer_zero_node
;
3943 dependence_stats
.num_siv_independent
++;
3952 chrec_b = {10, +, -1}
3954 In this case, chrec_a will not overlap with chrec_b. */
3955 *overlaps_a
= conflict_fn_no_dependence ();
3956 *overlaps_b
= conflict_fn_no_dependence ();
3957 *last_conflicts
= integer_zero_node
;
3958 dependence_stats
.num_siv_independent
++;
3965 if (TREE_CODE (chrec_b
) != POLYNOMIAL_CHREC
3966 || !chrec_is_positive (CHREC_RIGHT (chrec_b
), &value2
))
3968 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3969 fprintf (dump_file
, "siv test failed: chrec not positive.\n");
3971 *overlaps_a
= conflict_fn_not_known ();
3972 *overlaps_b
= conflict_fn_not_known ();
3973 *last_conflicts
= chrec_dont_know
;
3974 dependence_stats
.num_siv_unimplemented
++;
3979 if (value2
== false)
3983 chrec_b = {10, +, -1}
3985 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b
), difference
))
3987 HOST_WIDE_INT numiter
;
3988 class loop
*loop
= get_chrec_loop (chrec_b
);
3990 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
3991 tmp
= fold_build2 (EXACT_DIV_EXPR
, type
, difference
,
3992 CHREC_RIGHT (chrec_b
));
3993 *overlaps_b
= conflict_fn (1, affine_fn_cst (tmp
));
3994 *last_conflicts
= integer_one_node
;
3996 /* Perform weak-zero siv test to see if overlap is
3997 outside the loop bounds. */
3998 numiter
= max_stmt_executions_int (loop
);
4001 && compare_tree_int (tmp
, numiter
) > 0)
4003 free_conflict_function (*overlaps_a
);
4004 free_conflict_function (*overlaps_b
);
4005 *overlaps_a
= conflict_fn_no_dependence ();
4006 *overlaps_b
= conflict_fn_no_dependence ();
4007 *last_conflicts
= integer_zero_node
;
4008 dependence_stats
.num_siv_independent
++;
4011 dependence_stats
.num_siv_dependent
++;
4015 /* When the step does not divide the difference, there
4019 *overlaps_a
= conflict_fn_no_dependence ();
4020 *overlaps_b
= conflict_fn_no_dependence ();
4021 *last_conflicts
= integer_zero_node
;
4022 dependence_stats
.num_siv_independent
++;
4032 In this case, chrec_a will not overlap with chrec_b. */
4033 *overlaps_a
= conflict_fn_no_dependence ();
4034 *overlaps_b
= conflict_fn_no_dependence ();
4035 *last_conflicts
= integer_zero_node
;
4036 dependence_stats
.num_siv_independent
++;
4044 /* Helper recursive function for initializing the matrix A. Returns
4045 the initial value of CHREC. */
4048 initialize_matrix_A (lambda_matrix A
, tree chrec
, unsigned index
, int mult
)
4052 switch (TREE_CODE (chrec
))
4054 case POLYNOMIAL_CHREC
:
4055 HOST_WIDE_INT chrec_right
;
4056 if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec
)))
4057 return chrec_dont_know
;
4058 chrec_right
= int_cst_value (CHREC_RIGHT (chrec
));
4059 /* We want to be able to negate without overflow. */
4060 if (chrec_right
== HOST_WIDE_INT_MIN
)
4061 return chrec_dont_know
;
4062 A
[index
][0] = mult
* chrec_right
;
4063 return initialize_matrix_A (A
, CHREC_LEFT (chrec
), index
+ 1, mult
);
4069 tree op0
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
4070 tree op1
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 1), index
, mult
);
4072 return chrec_fold_op (TREE_CODE (chrec
), chrec_type (chrec
), op0
, op1
);
4077 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
4078 return chrec_convert (chrec_type (chrec
), op
, NULL
);
4083 /* Handle ~X as -1 - X. */
4084 tree op
= initialize_matrix_A (A
, TREE_OPERAND (chrec
, 0), index
, mult
);
4085 return chrec_fold_op (MINUS_EXPR
, chrec_type (chrec
),
4086 build_int_cst (TREE_TYPE (chrec
), -1), op
);
4098 #define FLOOR_DIV(x,y) ((x) / (y))
4100 /* Solves the special case of the Diophantine equation:
4101 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4103 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
4104 number of iterations that loops X and Y run. The overlaps will be
4105 constructed as evolutions in dimension DIM. */
4108 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter
,
4109 HOST_WIDE_INT step_a
,
4110 HOST_WIDE_INT step_b
,
4111 affine_fn
*overlaps_a
,
4112 affine_fn
*overlaps_b
,
4113 tree
*last_conflicts
, int dim
)
4115 if (((step_a
> 0 && step_b
> 0)
4116 || (step_a
< 0 && step_b
< 0)))
4118 HOST_WIDE_INT step_overlaps_a
, step_overlaps_b
;
4119 HOST_WIDE_INT gcd_steps_a_b
, last_conflict
, tau2
;
4121 gcd_steps_a_b
= gcd (step_a
, step_b
);
4122 step_overlaps_a
= step_b
/ gcd_steps_a_b
;
4123 step_overlaps_b
= step_a
/ gcd_steps_a_b
;
4127 tau2
= FLOOR_DIV (niter
, step_overlaps_a
);
4128 tau2
= MIN (tau2
, FLOOR_DIV (niter
, step_overlaps_b
));
4129 last_conflict
= tau2
;
4130 *last_conflicts
= build_int_cst (NULL_TREE
, last_conflict
);
4133 *last_conflicts
= chrec_dont_know
;
4135 *overlaps_a
= affine_fn_univar (integer_zero_node
, dim
,
4136 build_int_cst (NULL_TREE
,
4138 *overlaps_b
= affine_fn_univar (integer_zero_node
, dim
,
4139 build_int_cst (NULL_TREE
,
4145 *overlaps_a
= affine_fn_cst (integer_zero_node
);
4146 *overlaps_b
= affine_fn_cst (integer_zero_node
);
4147 *last_conflicts
= integer_zero_node
;
4151 /* Solves the special case of a Diophantine equation where CHREC_A is
4152 an affine bivariate function, and CHREC_B is an affine univariate
4153 function. For example,
4155 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4157 has the following overlapping functions:
4159 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4160 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4161 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4163 FORNOW: This is a specialized implementation for a case occurring in
4164 a common benchmark. Implement the general algorithm. */
4167 compute_overlap_steps_for_affine_1_2 (tree chrec_a
, tree chrec_b
,
4168 conflict_function
**overlaps_a
,
4169 conflict_function
**overlaps_b
,
4170 tree
*last_conflicts
)
4172 bool xz_p
, yz_p
, xyz_p
;
4173 HOST_WIDE_INT step_x
, step_y
, step_z
;
4174 HOST_WIDE_INT niter_x
, niter_y
, niter_z
, niter
;
4175 affine_fn overlaps_a_xz
, overlaps_b_xz
;
4176 affine_fn overlaps_a_yz
, overlaps_b_yz
;
4177 affine_fn overlaps_a_xyz
, overlaps_b_xyz
;
4178 affine_fn ova1
, ova2
, ovb
;
4179 tree last_conflicts_xz
, last_conflicts_yz
, last_conflicts_xyz
;
4181 step_x
= int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a
)));
4182 step_y
= int_cst_value (CHREC_RIGHT (chrec_a
));
4183 step_z
= int_cst_value (CHREC_RIGHT (chrec_b
));
4185 niter_x
= max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a
)));
4186 niter_y
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
4187 niter_z
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
4189 if (niter_x
< 0 || niter_y
< 0 || niter_z
< 0)
4191 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4192 fprintf (dump_file
, "overlap steps test failed: no iteration counts.\n");
4194 *overlaps_a
= conflict_fn_not_known ();
4195 *overlaps_b
= conflict_fn_not_known ();
4196 *last_conflicts
= chrec_dont_know
;
4200 niter
= MIN (niter_x
, niter_z
);
4201 compute_overlap_steps_for_affine_univar (niter
, step_x
, step_z
,
4204 &last_conflicts_xz
, 1);
4205 niter
= MIN (niter_y
, niter_z
);
4206 compute_overlap_steps_for_affine_univar (niter
, step_y
, step_z
,
4209 &last_conflicts_yz
, 2);
4210 niter
= MIN (niter_x
, niter_z
);
4211 niter
= MIN (niter_y
, niter
);
4212 compute_overlap_steps_for_affine_univar (niter
, step_x
+ step_y
, step_z
,
4215 &last_conflicts_xyz
, 3);
4217 xz_p
= !integer_zerop (last_conflicts_xz
);
4218 yz_p
= !integer_zerop (last_conflicts_yz
);
4219 xyz_p
= !integer_zerop (last_conflicts_xyz
);
4221 if (xz_p
|| yz_p
|| xyz_p
)
4223 ova1
= affine_fn_cst (integer_zero_node
);
4224 ova2
= affine_fn_cst (integer_zero_node
);
4225 ovb
= affine_fn_cst (integer_zero_node
);
4228 affine_fn t0
= ova1
;
4231 ova1
= affine_fn_plus (ova1
, overlaps_a_xz
);
4232 ovb
= affine_fn_plus (ovb
, overlaps_b_xz
);
4233 affine_fn_free (t0
);
4234 affine_fn_free (t2
);
4235 *last_conflicts
= last_conflicts_xz
;
4239 affine_fn t0
= ova2
;
4242 ova2
= affine_fn_plus (ova2
, overlaps_a_yz
);
4243 ovb
= affine_fn_plus (ovb
, overlaps_b_yz
);
4244 affine_fn_free (t0
);
4245 affine_fn_free (t2
);
4246 *last_conflicts
= last_conflicts_yz
;
4250 affine_fn t0
= ova1
;
4251 affine_fn t2
= ova2
;
4254 ova1
= affine_fn_plus (ova1
, overlaps_a_xyz
);
4255 ova2
= affine_fn_plus (ova2
, overlaps_a_xyz
);
4256 ovb
= affine_fn_plus (ovb
, overlaps_b_xyz
);
4257 affine_fn_free (t0
);
4258 affine_fn_free (t2
);
4259 affine_fn_free (t4
);
4260 *last_conflicts
= last_conflicts_xyz
;
4262 *overlaps_a
= conflict_fn (2, ova1
, ova2
);
4263 *overlaps_b
= conflict_fn (1, ovb
);
4267 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4268 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4269 *last_conflicts
= integer_zero_node
;
4272 affine_fn_free (overlaps_a_xz
);
4273 affine_fn_free (overlaps_b_xz
);
4274 affine_fn_free (overlaps_a_yz
);
4275 affine_fn_free (overlaps_b_yz
);
4276 affine_fn_free (overlaps_a_xyz
);
4277 affine_fn_free (overlaps_b_xyz
);
4280 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
4283 lambda_vector_copy (lambda_vector vec1
, lambda_vector vec2
,
4286 memcpy (vec2
, vec1
, size
* sizeof (*vec1
));
4289 /* Copy the elements of M x N matrix MAT1 to MAT2. */
4292 lambda_matrix_copy (lambda_matrix mat1
, lambda_matrix mat2
,
4297 for (i
= 0; i
< m
; i
++)
4298 lambda_vector_copy (mat1
[i
], mat2
[i
], n
);
4301 /* Store the N x N identity matrix in MAT. */
4304 lambda_matrix_id (lambda_matrix mat
, int size
)
4308 for (i
= 0; i
< size
; i
++)
4309 for (j
= 0; j
< size
; j
++)
4310 mat
[i
][j
] = (i
== j
) ? 1 : 0;
4313 /* Return the index of the first nonzero element of vector VEC1 between
4314 START and N. We must have START <= N.
4315 Returns N if VEC1 is the zero vector. */
4318 lambda_vector_first_nz (lambda_vector vec1
, int n
, int start
)
4321 while (j
< n
&& vec1
[j
] == 0)
4326 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4327 R2 = R2 + CONST1 * R1. */
4330 lambda_matrix_row_add (lambda_matrix mat
, int n
, int r1
, int r2
,
4338 for (i
= 0; i
< n
; i
++)
4341 lambda_int tem
= mul_hwi (mat
[r1
][i
], const1
, &ovf
);
4344 lambda_int tem2
= add_hwi (mat
[r2
][i
], tem
, &ovf
);
4345 if (ovf
|| tem2
== HOST_WIDE_INT_MIN
)
4353 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4354 and store the result in VEC2. */
4357 lambda_vector_mult_const (lambda_vector vec1
, lambda_vector vec2
,
4358 int size
, lambda_int const1
)
4363 lambda_vector_clear (vec2
, size
);
4365 for (i
= 0; i
< size
; i
++)
4366 vec2
[i
] = const1
* vec1
[i
];
4369 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
4372 lambda_vector_negate (lambda_vector vec1
, lambda_vector vec2
,
4375 lambda_vector_mult_const (vec1
, vec2
, size
, -1);
4378 /* Negate row R1 of matrix MAT which has N columns. */
4381 lambda_matrix_row_negate (lambda_matrix mat
, int n
, int r1
)
4383 lambda_vector_negate (mat
[r1
], mat
[r1
], n
);
4386 /* Return true if two vectors are equal. */
4389 lambda_vector_equal (lambda_vector vec1
, lambda_vector vec2
, int size
)
4392 for (i
= 0; i
< size
; i
++)
4393 if (vec1
[i
] != vec2
[i
])
4398 /* Given an M x N integer matrix A, this function determines an M x
4399 M unimodular matrix U, and an M x N echelon matrix S such that
4400 "U.A = S". This decomposition is also known as "right Hermite".
4402 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4403 Restructuring Compilers" Utpal Banerjee. */
4406 lambda_matrix_right_hermite (lambda_matrix A
, int m
, int n
,
4407 lambda_matrix S
, lambda_matrix U
)
4411 lambda_matrix_copy (A
, S
, m
, n
);
4412 lambda_matrix_id (U
, m
);
4414 for (j
= 0; j
< n
; j
++)
4416 if (lambda_vector_first_nz (S
[j
], m
, i0
) < m
)
4419 for (i
= m
- 1; i
>= i0
; i
--)
4421 while (S
[i
][j
] != 0)
4423 lambda_int factor
, a
, b
;
4427 gcc_assert (a
!= HOST_WIDE_INT_MIN
);
4430 if (!lambda_matrix_row_add (S
, n
, i
, i
-1, -factor
))
4432 std::swap (S
[i
], S
[i
-1]);
4434 if (!lambda_matrix_row_add (U
, m
, i
, i
-1, -factor
))
4436 std::swap (U
[i
], U
[i
-1]);
4445 /* Determines the overlapping elements due to accesses CHREC_A and
4446 CHREC_B, that are affine functions. This function cannot handle
4447 symbolic evolution functions, ie. when initial conditions are
4448 parameters, because it uses lambda matrices of integers. */
4451 analyze_subscript_affine_affine (tree chrec_a
,
4453 conflict_function
**overlaps_a
,
4454 conflict_function
**overlaps_b
,
4455 tree
*last_conflicts
)
4457 unsigned nb_vars_a
, nb_vars_b
, dim
;
4458 lambda_int gamma
, gcd_alpha_beta
;
4459 lambda_matrix A
, U
, S
;
4460 struct obstack scratch_obstack
;
4462 if (eq_evolutions_p (chrec_a
, chrec_b
))
4464 /* The accessed index overlaps for each iteration in the
4466 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4467 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4468 *last_conflicts
= chrec_dont_know
;
4471 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4472 fprintf (dump_file
, "(analyze_subscript_affine_affine \n");
4474 /* For determining the initial intersection, we have to solve a
4475 Diophantine equation. This is the most time consuming part.
4477 For answering to the question: "Is there a dependence?" we have
4478 to prove that there exists a solution to the Diophantine
4479 equation, and that the solution is in the iteration domain,
4480 i.e. the solution is positive or zero, and that the solution
4481 happens before the upper bound loop.nb_iterations. Otherwise
4482 there is no dependence. This function outputs a description of
4483 the iterations that hold the intersections. */
4485 nb_vars_a
= nb_vars_in_chrec (chrec_a
);
4486 nb_vars_b
= nb_vars_in_chrec (chrec_b
);
4488 gcc_obstack_init (&scratch_obstack
);
4490 dim
= nb_vars_a
+ nb_vars_b
;
4491 U
= lambda_matrix_new (dim
, dim
, &scratch_obstack
);
4492 A
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
4493 S
= lambda_matrix_new (dim
, 1, &scratch_obstack
);
4495 tree init_a
= initialize_matrix_A (A
, chrec_a
, 0, 1);
4496 tree init_b
= initialize_matrix_A (A
, chrec_b
, nb_vars_a
, -1);
4497 if (init_a
== chrec_dont_know
4498 || init_b
== chrec_dont_know
)
4500 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4501 fprintf (dump_file
, "affine-affine test failed: "
4502 "representation issue.\n");
4503 *overlaps_a
= conflict_fn_not_known ();
4504 *overlaps_b
= conflict_fn_not_known ();
4505 *last_conflicts
= chrec_dont_know
;
4506 goto end_analyze_subs_aa
;
4508 gamma
= int_cst_value (init_b
) - int_cst_value (init_a
);
4510 /* Don't do all the hard work of solving the Diophantine equation
4511 when we already know the solution: for example,
4514 | gamma = 3 - 3 = 0.
4515 Then the first overlap occurs during the first iterations:
4516 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4520 if (nb_vars_a
== 1 && nb_vars_b
== 1)
4522 HOST_WIDE_INT step_a
, step_b
;
4523 HOST_WIDE_INT niter
, niter_a
, niter_b
;
4526 niter_a
= max_stmt_executions_int (get_chrec_loop (chrec_a
));
4527 niter_b
= max_stmt_executions_int (get_chrec_loop (chrec_b
));
4528 niter
= MIN (niter_a
, niter_b
);
4529 step_a
= int_cst_value (CHREC_RIGHT (chrec_a
));
4530 step_b
= int_cst_value (CHREC_RIGHT (chrec_b
));
4532 compute_overlap_steps_for_affine_univar (niter
, step_a
, step_b
,
4535 *overlaps_a
= conflict_fn (1, ova
);
4536 *overlaps_b
= conflict_fn (1, ovb
);
4539 else if (nb_vars_a
== 2 && nb_vars_b
== 1)
4540 compute_overlap_steps_for_affine_1_2
4541 (chrec_a
, chrec_b
, overlaps_a
, overlaps_b
, last_conflicts
);
4543 else if (nb_vars_a
== 1 && nb_vars_b
== 2)
4544 compute_overlap_steps_for_affine_1_2
4545 (chrec_b
, chrec_a
, overlaps_b
, overlaps_a
, last_conflicts
);
4549 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4550 fprintf (dump_file
, "affine-affine test failed: too many variables.\n");
4551 *overlaps_a
= conflict_fn_not_known ();
4552 *overlaps_b
= conflict_fn_not_known ();
4553 *last_conflicts
= chrec_dont_know
;
4555 goto end_analyze_subs_aa
;
4559 if (!lambda_matrix_right_hermite (A
, dim
, 1, S
, U
))
4561 *overlaps_a
= conflict_fn_not_known ();
4562 *overlaps_b
= conflict_fn_not_known ();
4563 *last_conflicts
= chrec_dont_know
;
4564 goto end_analyze_subs_aa
;
4570 lambda_matrix_row_negate (U
, dim
, 0);
4572 gcd_alpha_beta
= S
[0][0];
4574 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4575 but that is a quite strange case. Instead of ICEing, answer
4577 if (gcd_alpha_beta
== 0)
4579 *overlaps_a
= conflict_fn_not_known ();
4580 *overlaps_b
= conflict_fn_not_known ();
4581 *last_conflicts
= chrec_dont_know
;
4582 goto end_analyze_subs_aa
;
4585 /* The classic "gcd-test". */
4586 if (!int_divides_p (gcd_alpha_beta
, gamma
))
4588 /* The "gcd-test" has determined that there is no integer
4589 solution, i.e. there is no dependence. */
4590 *overlaps_a
= conflict_fn_no_dependence ();
4591 *overlaps_b
= conflict_fn_no_dependence ();
4592 *last_conflicts
= integer_zero_node
;
4595 /* Both access functions are univariate. This includes SIV and MIV cases. */
4596 else if (nb_vars_a
== 1 && nb_vars_b
== 1)
4598 /* Both functions should have the same evolution sign. */
4599 if (((A
[0][0] > 0 && -A
[1][0] > 0)
4600 || (A
[0][0] < 0 && -A
[1][0] < 0)))
4602 /* The solutions are given by:
4604 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
4607 For a given integer t. Using the following variables,
4609 | i0 = u11 * gamma / gcd_alpha_beta
4610 | j0 = u12 * gamma / gcd_alpha_beta
4617 | y0 = j0 + j1 * t. */
4618 HOST_WIDE_INT i0
, j0
, i1
, j1
;
4620 i0
= U
[0][0] * gamma
/ gcd_alpha_beta
;
4621 j0
= U
[0][1] * gamma
/ gcd_alpha_beta
;
4625 if ((i1
== 0 && i0
< 0)
4626 || (j1
== 0 && j0
< 0))
4628 /* There is no solution.
4629 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4630 falls in here, but for the moment we don't look at the
4631 upper bound of the iteration domain. */
4632 *overlaps_a
= conflict_fn_no_dependence ();
4633 *overlaps_b
= conflict_fn_no_dependence ();
4634 *last_conflicts
= integer_zero_node
;
4635 goto end_analyze_subs_aa
;
4638 if (i1
> 0 && j1
> 0)
4640 HOST_WIDE_INT niter_a
4641 = max_stmt_executions_int (get_chrec_loop (chrec_a
));
4642 HOST_WIDE_INT niter_b
4643 = max_stmt_executions_int (get_chrec_loop (chrec_b
));
4644 HOST_WIDE_INT niter
= MIN (niter_a
, niter_b
);
4646 /* (X0, Y0) is a solution of the Diophantine equation:
4647 "chrec_a (X0) = chrec_b (Y0)". */
4648 HOST_WIDE_INT tau1
= MAX (CEIL (-i0
, i1
),
4650 HOST_WIDE_INT x0
= i1
* tau1
+ i0
;
4651 HOST_WIDE_INT y0
= j1
* tau1
+ j0
;
4653 /* (X1, Y1) is the smallest positive solution of the eq
4654 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4655 first conflict occurs. */
4656 HOST_WIDE_INT min_multiple
= MIN (x0
/ i1
, y0
/ j1
);
4657 HOST_WIDE_INT x1
= x0
- i1
* min_multiple
;
4658 HOST_WIDE_INT y1
= y0
- j1
* min_multiple
;
4662 /* If the overlap occurs outside of the bounds of the
4663 loop, there is no dependence. */
4664 if (x1
>= niter_a
|| y1
>= niter_b
)
4666 *overlaps_a
= conflict_fn_no_dependence ();
4667 *overlaps_b
= conflict_fn_no_dependence ();
4668 *last_conflicts
= integer_zero_node
;
4669 goto end_analyze_subs_aa
;
4672 /* max stmt executions can get quite large, avoid
4673 overflows by using wide ints here. */
4675 = wi::smin (wi::sdiv_floor (wi::sub (niter_a
, i0
), i1
),
4676 wi::sdiv_floor (wi::sub (niter_b
, j0
), j1
));
4677 widest_int last_conflict
= wi::sub (tau2
, (x1
- i0
)/i1
);
4678 if (wi::min_precision (last_conflict
, SIGNED
)
4679 <= TYPE_PRECISION (integer_type_node
))
4681 = build_int_cst (integer_type_node
,
4682 last_conflict
.to_shwi ());
4684 *last_conflicts
= chrec_dont_know
;
4687 *last_conflicts
= chrec_dont_know
;
4691 affine_fn_univar (build_int_cst (NULL_TREE
, x1
),
4693 build_int_cst (NULL_TREE
, i1
)));
4696 affine_fn_univar (build_int_cst (NULL_TREE
, y1
),
4698 build_int_cst (NULL_TREE
, j1
)));
4702 /* FIXME: For the moment, the upper bound of the
4703 iteration domain for i and j is not checked. */
4704 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4705 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
4706 *overlaps_a
= conflict_fn_not_known ();
4707 *overlaps_b
= conflict_fn_not_known ();
4708 *last_conflicts
= chrec_dont_know
;
4713 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4714 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
4715 *overlaps_a
= conflict_fn_not_known ();
4716 *overlaps_b
= conflict_fn_not_known ();
4717 *last_conflicts
= chrec_dont_know
;
4722 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4723 fprintf (dump_file
, "affine-affine test failed: unimplemented.\n");
4724 *overlaps_a
= conflict_fn_not_known ();
4725 *overlaps_b
= conflict_fn_not_known ();
4726 *last_conflicts
= chrec_dont_know
;
4729 end_analyze_subs_aa
:
4730 obstack_free (&scratch_obstack
, NULL
);
4731 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4733 fprintf (dump_file
, " (overlaps_a = ");
4734 dump_conflict_function (dump_file
, *overlaps_a
);
4735 fprintf (dump_file
, ")\n (overlaps_b = ");
4736 dump_conflict_function (dump_file
, *overlaps_b
);
4737 fprintf (dump_file
, "))\n");
4741 /* Returns true when analyze_subscript_affine_affine can be used for
4742 determining the dependence relation between chrec_a and chrec_b,
4743 that contain symbols. This function modifies chrec_a and chrec_b
4744 such that the analysis result is the same, and such that they don't
4745 contain symbols, and then can safely be passed to the analyzer.
4747 Example: The analysis of the following tuples of evolutions produce
4748 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4751 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4752 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4756 can_use_analyze_subscript_affine_affine (tree
*chrec_a
, tree
*chrec_b
)
4758 tree diff
, type
, left_a
, left_b
, right_b
;
4760 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a
))
4761 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b
)))
4762 /* FIXME: For the moment not handled. Might be refined later. */
4765 type
= chrec_type (*chrec_a
);
4766 left_a
= CHREC_LEFT (*chrec_a
);
4767 left_b
= chrec_convert (type
, CHREC_LEFT (*chrec_b
), NULL
);
4768 diff
= chrec_fold_minus (type
, left_a
, left_b
);
4770 if (!evolution_function_is_constant_p (diff
))
4773 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4774 fprintf (dump_file
, "can_use_subscript_aff_aff_for_symbolic \n");
4776 *chrec_a
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_a
),
4777 diff
, CHREC_RIGHT (*chrec_a
));
4778 right_b
= chrec_convert (type
, CHREC_RIGHT (*chrec_b
), NULL
);
4779 *chrec_b
= build_polynomial_chrec (CHREC_VARIABLE (*chrec_b
),
4780 build_int_cst (type
, 0),
4785 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
4786 *OVERLAPS_B are initialized to the functions that describe the
4787 relation between the elements accessed twice by CHREC_A and
4788 CHREC_B. For k >= 0, the following property is verified:
4790 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4793 analyze_siv_subscript (tree chrec_a
,
4795 conflict_function
**overlaps_a
,
4796 conflict_function
**overlaps_b
,
4797 tree
*last_conflicts
,
4800 dependence_stats
.num_siv
++;
4802 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4803 fprintf (dump_file
, "(analyze_siv_subscript \n");
4805 if (evolution_function_is_constant_p (chrec_a
)
4806 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
4807 analyze_siv_subscript_cst_affine (chrec_a
, chrec_b
,
4808 overlaps_a
, overlaps_b
, last_conflicts
);
4810 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
4811 && evolution_function_is_constant_p (chrec_b
))
4812 analyze_siv_subscript_cst_affine (chrec_b
, chrec_a
,
4813 overlaps_b
, overlaps_a
, last_conflicts
);
4815 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest_num
)
4816 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest_num
))
4818 if (!chrec_contains_symbols (chrec_a
)
4819 && !chrec_contains_symbols (chrec_b
))
4821 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
4822 overlaps_a
, overlaps_b
,
4825 if (CF_NOT_KNOWN_P (*overlaps_a
)
4826 || CF_NOT_KNOWN_P (*overlaps_b
))
4827 dependence_stats
.num_siv_unimplemented
++;
4828 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
4829 || CF_NO_DEPENDENCE_P (*overlaps_b
))
4830 dependence_stats
.num_siv_independent
++;
4832 dependence_stats
.num_siv_dependent
++;
4834 else if (can_use_analyze_subscript_affine_affine (&chrec_a
,
4837 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
4838 overlaps_a
, overlaps_b
,
4841 if (CF_NOT_KNOWN_P (*overlaps_a
)
4842 || CF_NOT_KNOWN_P (*overlaps_b
))
4843 dependence_stats
.num_siv_unimplemented
++;
4844 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
4845 || CF_NO_DEPENDENCE_P (*overlaps_b
))
4846 dependence_stats
.num_siv_independent
++;
4848 dependence_stats
.num_siv_dependent
++;
4851 goto siv_subscript_dontknow
;
4856 siv_subscript_dontknow
:;
4857 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4858 fprintf (dump_file
, " siv test failed: unimplemented");
4859 *overlaps_a
= conflict_fn_not_known ();
4860 *overlaps_b
= conflict_fn_not_known ();
4861 *last_conflicts
= chrec_dont_know
;
4862 dependence_stats
.num_siv_unimplemented
++;
4865 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4866 fprintf (dump_file
, ")\n");
4869 /* Returns false if we can prove that the greatest common divisor of the steps
4870 of CHREC does not divide CST, false otherwise. */
4873 gcd_of_steps_may_divide_p (const_tree chrec
, const_tree cst
)
4875 HOST_WIDE_INT cd
= 0, val
;
4878 if (!tree_fits_shwi_p (cst
))
4880 val
= tree_to_shwi (cst
);
4882 while (TREE_CODE (chrec
) == POLYNOMIAL_CHREC
)
4884 step
= CHREC_RIGHT (chrec
);
4885 if (!tree_fits_shwi_p (step
))
4887 cd
= gcd (cd
, tree_to_shwi (step
));
4888 chrec
= CHREC_LEFT (chrec
);
4891 return val
% cd
== 0;
4894 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4895 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
4896 functions that describe the relation between the elements accessed
4897 twice by CHREC_A and CHREC_B. For k >= 0, the following property
4900 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
4903 analyze_miv_subscript (tree chrec_a
,
4905 conflict_function
**overlaps_a
,
4906 conflict_function
**overlaps_b
,
4907 tree
*last_conflicts
,
4908 class loop
*loop_nest
)
4910 tree type
, difference
;
4912 dependence_stats
.num_miv
++;
4913 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4914 fprintf (dump_file
, "(analyze_miv_subscript \n");
4916 type
= signed_type_for_types (TREE_TYPE (chrec_a
), TREE_TYPE (chrec_b
));
4917 chrec_a
= chrec_convert (type
, chrec_a
, NULL
);
4918 chrec_b
= chrec_convert (type
, chrec_b
, NULL
);
4919 difference
= chrec_fold_minus (type
, chrec_a
, chrec_b
);
4921 if (eq_evolutions_p (chrec_a
, chrec_b
))
4923 /* Access functions are the same: all the elements are accessed
4924 in the same order. */
4925 *overlaps_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4926 *overlaps_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
4927 *last_conflicts
= max_stmt_executions_tree (get_chrec_loop (chrec_a
));
4928 dependence_stats
.num_miv_dependent
++;
4931 else if (evolution_function_is_constant_p (difference
)
4932 && evolution_function_is_affine_multivariate_p (chrec_a
,
4934 && !gcd_of_steps_may_divide_p (chrec_a
, difference
))
4936 /* testsuite/.../ssa-chrec-33.c
4937 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
4939 The difference is 1, and all the evolution steps are multiples
4940 of 2, consequently there are no overlapping elements. */
4941 *overlaps_a
= conflict_fn_no_dependence ();
4942 *overlaps_b
= conflict_fn_no_dependence ();
4943 *last_conflicts
= integer_zero_node
;
4944 dependence_stats
.num_miv_independent
++;
4947 else if (evolution_function_is_affine_in_loop (chrec_a
, loop_nest
->num
)
4948 && !chrec_contains_symbols (chrec_a
, loop_nest
)
4949 && evolution_function_is_affine_in_loop (chrec_b
, loop_nest
->num
)
4950 && !chrec_contains_symbols (chrec_b
, loop_nest
))
4952 /* testsuite/.../ssa-chrec-35.c
4953 {0, +, 1}_2 vs. {0, +, 1}_3
4954 the overlapping elements are respectively located at iterations:
4955 {0, +, 1}_x and {0, +, 1}_x,
4956 in other words, we have the equality:
4957 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4960 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4961 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4963 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4964 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4966 analyze_subscript_affine_affine (chrec_a
, chrec_b
,
4967 overlaps_a
, overlaps_b
, last_conflicts
);
4969 if (CF_NOT_KNOWN_P (*overlaps_a
)
4970 || CF_NOT_KNOWN_P (*overlaps_b
))
4971 dependence_stats
.num_miv_unimplemented
++;
4972 else if (CF_NO_DEPENDENCE_P (*overlaps_a
)
4973 || CF_NO_DEPENDENCE_P (*overlaps_b
))
4974 dependence_stats
.num_miv_independent
++;
4976 dependence_stats
.num_miv_dependent
++;
4981 /* When the analysis is too difficult, answer "don't know". */
4982 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4983 fprintf (dump_file
, "analyze_miv_subscript test failed: unimplemented.\n");
4985 *overlaps_a
= conflict_fn_not_known ();
4986 *overlaps_b
= conflict_fn_not_known ();
4987 *last_conflicts
= chrec_dont_know
;
4988 dependence_stats
.num_miv_unimplemented
++;
4991 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4992 fprintf (dump_file
, ")\n");
4995 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4996 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
4997 OVERLAP_ITERATIONS_B are initialized with two functions that
4998 describe the iterations that contain conflicting elements.
5000 Remark: For an integer k >= 0, the following equality is true:
5002 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
5006 analyze_overlapping_iterations (tree chrec_a
,
5008 conflict_function
**overlap_iterations_a
,
5009 conflict_function
**overlap_iterations_b
,
5010 tree
*last_conflicts
, class loop
*loop_nest
)
5012 unsigned int lnn
= loop_nest
->num
;
5014 dependence_stats
.num_subscript_tests
++;
5016 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5018 fprintf (dump_file
, "(analyze_overlapping_iterations \n");
5019 fprintf (dump_file
, " (chrec_a = ");
5020 print_generic_expr (dump_file
, chrec_a
);
5021 fprintf (dump_file
, ")\n (chrec_b = ");
5022 print_generic_expr (dump_file
, chrec_b
);
5023 fprintf (dump_file
, ")\n");
5026 if (chrec_a
== NULL_TREE
5027 || chrec_b
== NULL_TREE
5028 || chrec_contains_undetermined (chrec_a
)
5029 || chrec_contains_undetermined (chrec_b
))
5031 dependence_stats
.num_subscript_undetermined
++;
5033 *overlap_iterations_a
= conflict_fn_not_known ();
5034 *overlap_iterations_b
= conflict_fn_not_known ();
5037 /* If they are the same chrec, and are affine, they overlap
5038 on every iteration. */
5039 else if (eq_evolutions_p (chrec_a
, chrec_b
)
5040 && (evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
5041 || operand_equal_p (chrec_a
, chrec_b
, 0)))
5043 dependence_stats
.num_same_subscript_function
++;
5044 *overlap_iterations_a
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
5045 *overlap_iterations_b
= conflict_fn (1, affine_fn_cst (integer_zero_node
));
5046 *last_conflicts
= chrec_dont_know
;
5049 /* If they aren't the same, and aren't affine, we can't do anything
5051 else if ((chrec_contains_symbols (chrec_a
)
5052 || chrec_contains_symbols (chrec_b
))
5053 && (!evolution_function_is_affine_multivariate_p (chrec_a
, lnn
)
5054 || !evolution_function_is_affine_multivariate_p (chrec_b
, lnn
)))
5056 dependence_stats
.num_subscript_undetermined
++;
5057 *overlap_iterations_a
= conflict_fn_not_known ();
5058 *overlap_iterations_b
= conflict_fn_not_known ();
5061 else if (ziv_subscript_p (chrec_a
, chrec_b
))
5062 analyze_ziv_subscript (chrec_a
, chrec_b
,
5063 overlap_iterations_a
, overlap_iterations_b
,
5066 else if (siv_subscript_p (chrec_a
, chrec_b
))
5067 analyze_siv_subscript (chrec_a
, chrec_b
,
5068 overlap_iterations_a
, overlap_iterations_b
,
5069 last_conflicts
, lnn
);
5072 analyze_miv_subscript (chrec_a
, chrec_b
,
5073 overlap_iterations_a
, overlap_iterations_b
,
5074 last_conflicts
, loop_nest
);
5076 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5078 fprintf (dump_file
, " (overlap_iterations_a = ");
5079 dump_conflict_function (dump_file
, *overlap_iterations_a
);
5080 fprintf (dump_file
, ")\n (overlap_iterations_b = ");
5081 dump_conflict_function (dump_file
, *overlap_iterations_b
);
5082 fprintf (dump_file
, "))\n");
5086 /* Helper function for uniquely inserting distance vectors. */
5089 save_dist_v (struct data_dependence_relation
*ddr
, lambda_vector dist_v
)
5091 for (lambda_vector v
: DDR_DIST_VECTS (ddr
))
5092 if (lambda_vector_equal (v
, dist_v
, DDR_NB_LOOPS (ddr
)))
5095 DDR_DIST_VECTS (ddr
).safe_push (dist_v
);
5098 /* Helper function for uniquely inserting direction vectors. */
5101 save_dir_v (struct data_dependence_relation
*ddr
, lambda_vector dir_v
)
5103 for (lambda_vector v
: DDR_DIR_VECTS (ddr
))
5104 if (lambda_vector_equal (v
, dir_v
, DDR_NB_LOOPS (ddr
)))
5107 DDR_DIR_VECTS (ddr
).safe_push (dir_v
);
5110 /* Add a distance of 1 on all the loops outer than INDEX. If we
5111 haven't yet determined a distance for this outer loop, push a new
5112 distance vector composed of the previous distance, and a distance
5113 of 1 for this outer loop. Example:
5121 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
5122 save (0, 1), then we have to save (1, 0). */
5125 add_outer_distances (struct data_dependence_relation
*ddr
,
5126 lambda_vector dist_v
, int index
)
5128 /* For each outer loop where init_v is not set, the accesses are
5129 in dependence of distance 1 in the loop. */
5130 while (--index
>= 0)
5132 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5133 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
5135 save_dist_v (ddr
, save_v
);
5139 /* Return false when fail to represent the data dependence as a
5140 distance vector. A_INDEX is the index of the first reference
5141 (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5142 second reference. INIT_B is set to true when a component has been
5143 added to the distance vector DIST_V. INDEX_CARRY is then set to
5144 the index in DIST_V that carries the dependence. */
5147 build_classic_dist_vector_1 (struct data_dependence_relation
*ddr
,
5148 unsigned int a_index
, unsigned int b_index
,
5149 lambda_vector dist_v
, bool *init_b
,
5153 lambda_vector init_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5154 class loop
*loop
= DDR_LOOP_NEST (ddr
)[0];
5156 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
5158 tree access_fn_a
, access_fn_b
;
5159 struct subscript
*subscript
= DDR_SUBSCRIPT (ddr
, i
);
5161 if (chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
5163 non_affine_dependence_relation (ddr
);
5167 access_fn_a
= SUB_ACCESS_FN (subscript
, a_index
);
5168 access_fn_b
= SUB_ACCESS_FN (subscript
, b_index
);
5170 if (TREE_CODE (access_fn_a
) == POLYNOMIAL_CHREC
5171 && TREE_CODE (access_fn_b
) == POLYNOMIAL_CHREC
)
5175 int var_a
= CHREC_VARIABLE (access_fn_a
);
5176 int var_b
= CHREC_VARIABLE (access_fn_b
);
5179 || chrec_contains_undetermined (SUB_DISTANCE (subscript
)))
5181 non_affine_dependence_relation (ddr
);
5185 /* When data references are collected in a loop while data
5186 dependences are analyzed in loop nest nested in the loop, we
5187 would have more number of access functions than number of
5188 loops. Skip access functions of loops not in the loop nest.
5190 See PR89725 for more information. */
5191 if (flow_loop_nested_p (get_loop (cfun
, var_a
), loop
))
5194 dist
= int_cst_value (SUB_DISTANCE (subscript
));
5195 index
= index_in_loop_nest (var_a
, DDR_LOOP_NEST (ddr
));
5196 *index_carry
= MIN (index
, *index_carry
);
5198 /* This is the subscript coupling test. If we have already
5199 recorded a distance for this loop (a distance coming from
5200 another subscript), it should be the same. For example,
5201 in the following code, there is no dependence:
5208 if (init_v
[index
] != 0 && dist_v
[index
] != dist
)
5210 finalize_ddr_dependent (ddr
, chrec_known
);
5214 dist_v
[index
] = dist
;
5218 else if (!operand_equal_p (access_fn_a
, access_fn_b
, 0))
5220 /* This can be for example an affine vs. constant dependence
5221 (T[i] vs. T[3]) that is not an affine dependence and is
5222 not representable as a distance vector. */
5223 non_affine_dependence_relation (ddr
);
5231 /* Return true when the DDR contains only invariant access functions wrto. loop
5235 invariant_access_functions (const struct data_dependence_relation
*ddr
,
5238 for (subscript
*sub
: DDR_SUBSCRIPTS (ddr
))
5239 if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub
, 0), lnum
)
5240 || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub
, 1), lnum
))
5246 /* Helper function for the case where DDR_A and DDR_B are the same
5247 multivariate access function with a constant step. For an example
5251 add_multivariate_self_dist (struct data_dependence_relation
*ddr
, tree c_2
)
5254 tree c_1
= CHREC_LEFT (c_2
);
5255 tree c_0
= CHREC_LEFT (c_1
);
5256 lambda_vector dist_v
;
5257 HOST_WIDE_INT v1
, v2
, cd
;
5259 /* Polynomials with more than 2 variables are not handled yet. When
5260 the evolution steps are parameters, it is not possible to
5261 represent the dependence using classical distance vectors. */
5262 if (TREE_CODE (c_0
) != INTEGER_CST
5263 || TREE_CODE (CHREC_RIGHT (c_1
)) != INTEGER_CST
5264 || TREE_CODE (CHREC_RIGHT (c_2
)) != INTEGER_CST
)
5266 DDR_AFFINE_P (ddr
) = false;
5270 x_2
= index_in_loop_nest (CHREC_VARIABLE (c_2
), DDR_LOOP_NEST (ddr
));
5271 x_1
= index_in_loop_nest (CHREC_VARIABLE (c_1
), DDR_LOOP_NEST (ddr
));
5273 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
5274 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5275 v1
= int_cst_value (CHREC_RIGHT (c_1
));
5276 v2
= int_cst_value (CHREC_RIGHT (c_2
));
5289 save_dist_v (ddr
, dist_v
);
5291 add_outer_distances (ddr
, dist_v
, x_1
);
5294 /* Helper function for the case where DDR_A and DDR_B are the same
5295 access functions. */
5298 add_other_self_distances (struct data_dependence_relation
*ddr
)
5300 lambda_vector dist_v
;
5302 int index_carry
= DDR_NB_LOOPS (ddr
);
5304 class loop
*loop
= DDR_LOOP_NEST (ddr
)[0];
5306 FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr
), i
, sub
)
5308 tree access_fun
= SUB_ACCESS_FN (sub
, 0);
5310 if (TREE_CODE (access_fun
) == POLYNOMIAL_CHREC
)
5312 if (!evolution_function_is_univariate_p (access_fun
, loop
->num
))
5314 if (DDR_NUM_SUBSCRIPTS (ddr
) != 1)
5316 DDR_ARE_DEPENDENT (ddr
) = chrec_dont_know
;
5320 access_fun
= SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr
, 0), 0);
5322 if (TREE_CODE (CHREC_LEFT (access_fun
)) == POLYNOMIAL_CHREC
)
5323 add_multivariate_self_dist (ddr
, access_fun
);
5325 /* The evolution step is not constant: it varies in
5326 the outer loop, so this cannot be represented by a
5327 distance vector. For example in pr34635.c the
5328 evolution is {0, +, {0, +, 4}_1}_2. */
5329 DDR_AFFINE_P (ddr
) = false;
5334 /* When data references are collected in a loop while data
5335 dependences are analyzed in loop nest nested in the loop, we
5336 would have more number of access functions than number of
5337 loops. Skip access functions of loops not in the loop nest.
5339 See PR89725 for more information. */
5340 if (flow_loop_nested_p (get_loop (cfun
, CHREC_VARIABLE (access_fun
)),
5344 index_carry
= MIN (index_carry
,
5345 index_in_loop_nest (CHREC_VARIABLE (access_fun
),
5346 DDR_LOOP_NEST (ddr
)));
5350 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5351 add_outer_distances (ddr
, dist_v
, index_carry
);
5355 insert_innermost_unit_dist_vector (struct data_dependence_relation
*ddr
)
5357 lambda_vector dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5360 save_dist_v (ddr
, dist_v
);
5363 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
5364 is the case for example when access functions are the same and
5365 equal to a constant, as in:
5372 in which case the distance vectors are (0) and (1). */
5375 add_distance_for_zero_overlaps (struct data_dependence_relation
*ddr
)
5379 for (i
= 0; i
< DDR_NUM_SUBSCRIPTS (ddr
); i
++)
5381 subscript_p sub
= DDR_SUBSCRIPT (ddr
, i
);
5382 conflict_function
*ca
= SUB_CONFLICTS_IN_A (sub
);
5383 conflict_function
*cb
= SUB_CONFLICTS_IN_B (sub
);
5385 for (j
= 0; j
< ca
->n
; j
++)
5386 if (affine_function_zero_p (ca
->fns
[j
]))
5388 insert_innermost_unit_dist_vector (ddr
);
5392 for (j
= 0; j
< cb
->n
; j
++)
5393 if (affine_function_zero_p (cb
->fns
[j
]))
5395 insert_innermost_unit_dist_vector (ddr
);
5401 /* Return true when the DDR contains two data references that have the
5402 same access functions. */
5405 same_access_functions (const struct data_dependence_relation
*ddr
)
5407 for (subscript
*sub
: DDR_SUBSCRIPTS (ddr
))
5408 if (!eq_evolutions_p (SUB_ACCESS_FN (sub
, 0),
5409 SUB_ACCESS_FN (sub
, 1)))
5415 /* Compute the classic per loop distance vector. DDR is the data
5416 dependence relation to build a vector from. Return false when fail
5417 to represent the data dependence as a distance vector. */
5420 build_classic_dist_vector (struct data_dependence_relation
*ddr
,
5421 class loop
*loop_nest
)
5423 bool init_b
= false;
5424 int index_carry
= DDR_NB_LOOPS (ddr
);
5425 lambda_vector dist_v
;
5427 if (DDR_ARE_DEPENDENT (ddr
) != NULL_TREE
)
5430 if (same_access_functions (ddr
))
5432 /* Save the 0 vector. */
5433 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5434 save_dist_v (ddr
, dist_v
);
5436 if (invariant_access_functions (ddr
, loop_nest
->num
))
5437 add_distance_for_zero_overlaps (ddr
);
5439 if (DDR_NB_LOOPS (ddr
) > 1)
5440 add_other_self_distances (ddr
);
5445 dist_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5446 if (!build_classic_dist_vector_1 (ddr
, 0, 1, dist_v
, &init_b
, &index_carry
))
5449 /* Save the distance vector if we initialized one. */
5452 /* Verify a basic constraint: classic distance vectors should
5453 always be lexicographically positive.
5455 Data references are collected in the order of execution of
5456 the program, thus for the following loop
5458 | for (i = 1; i < 100; i++)
5459 | for (j = 1; j < 100; j++)
5461 | t = T[j+1][i-1]; // A
5462 | T[j][i] = t + 2; // B
5465 references are collected following the direction of the wind:
5466 A then B. The data dependence tests are performed also
5467 following this order, such that we're looking at the distance
5468 separating the elements accessed by A from the elements later
5469 accessed by B. But in this example, the distance returned by
5470 test_dep (A, B) is lexicographically negative (-1, 1), that
5471 means that the access A occurs later than B with respect to
5472 the outer loop, ie. we're actually looking upwind. In this
5473 case we solve test_dep (B, A) looking downwind to the
5474 lexicographically positive solution, that returns the
5475 distance vector (1, -1). */
5476 if (!lambda_vector_lexico_pos (dist_v
, DDR_NB_LOOPS (ddr
)))
5478 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5479 if (!subscript_dependence_tester_1 (ddr
, 1, 0, loop_nest
))
5481 compute_subscript_distance (ddr
);
5482 if (!build_classic_dist_vector_1 (ddr
, 1, 0, save_v
, &init_b
,
5485 save_dist_v (ddr
, save_v
);
5486 DDR_REVERSED_P (ddr
) = true;
5488 /* In this case there is a dependence forward for all the
5491 | for (k = 1; k < 100; k++)
5492 | for (i = 1; i < 100; i++)
5493 | for (j = 1; j < 100; j++)
5495 | t = T[j+1][i-1]; // A
5496 | T[j][i] = t + 2; // B
5504 if (DDR_NB_LOOPS (ddr
) > 1)
5506 add_outer_distances (ddr
, save_v
, index_carry
);
5507 add_outer_distances (ddr
, dist_v
, index_carry
);
5512 lambda_vector save_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5513 lambda_vector_copy (dist_v
, save_v
, DDR_NB_LOOPS (ddr
));
5515 if (DDR_NB_LOOPS (ddr
) > 1)
5517 lambda_vector opposite_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5519 if (!subscript_dependence_tester_1 (ddr
, 1, 0, loop_nest
))
5521 compute_subscript_distance (ddr
);
5522 if (!build_classic_dist_vector_1 (ddr
, 1, 0, opposite_v
, &init_b
,
5526 save_dist_v (ddr
, save_v
);
5527 add_outer_distances (ddr
, dist_v
, index_carry
);
5528 add_outer_distances (ddr
, opposite_v
, index_carry
);
5531 save_dist_v (ddr
, save_v
);
5536 /* There is a distance of 1 on all the outer loops: Example:
5537 there is a dependence of distance 1 on loop_1 for the array A.
5543 add_outer_distances (ddr
, dist_v
,
5544 lambda_vector_first_nz (dist_v
,
5545 DDR_NB_LOOPS (ddr
), 0));
5551 /* Return the direction for a given distance.
5552 FIXME: Computing dir this way is suboptimal, since dir can catch
5553 cases that dist is unable to represent. */
5555 static inline enum data_dependence_direction
5556 dir_from_dist (int dist
)
5559 return dir_positive
;
5561 return dir_negative
;
5566 /* Compute the classic per loop direction vector. DDR is the data
5567 dependence relation to build a vector from. */
5570 build_classic_dir_vector (struct data_dependence_relation
*ddr
)
5573 lambda_vector dist_v
;
5575 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
5577 lambda_vector dir_v
= lambda_vector_new (DDR_NB_LOOPS (ddr
));
5579 for (j
= 0; j
< DDR_NB_LOOPS (ddr
); j
++)
5580 dir_v
[j
] = dir_from_dist (dist_v
[j
]);
5582 save_dir_v (ddr
, dir_v
);
5586 /* Helper function. Returns true when there is a dependence between the
5587 data references. A_INDEX is the index of the first reference (0 for
5588 DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference. */
5591 subscript_dependence_tester_1 (struct data_dependence_relation
*ddr
,
5592 unsigned int a_index
, unsigned int b_index
,
5593 class loop
*loop_nest
)
5596 tree last_conflicts
;
5597 struct subscript
*subscript
;
5598 tree res
= NULL_TREE
;
5600 for (i
= 0; DDR_SUBSCRIPTS (ddr
).iterate (i
, &subscript
); i
++)
5602 conflict_function
*overlaps_a
, *overlaps_b
;
5604 analyze_overlapping_iterations (SUB_ACCESS_FN (subscript
, a_index
),
5605 SUB_ACCESS_FN (subscript
, b_index
),
5606 &overlaps_a
, &overlaps_b
,
5607 &last_conflicts
, loop_nest
);
5609 if (SUB_CONFLICTS_IN_A (subscript
))
5610 free_conflict_function (SUB_CONFLICTS_IN_A (subscript
));
5611 if (SUB_CONFLICTS_IN_B (subscript
))
5612 free_conflict_function (SUB_CONFLICTS_IN_B (subscript
));
5614 SUB_CONFLICTS_IN_A (subscript
) = overlaps_a
;
5615 SUB_CONFLICTS_IN_B (subscript
) = overlaps_b
;
5616 SUB_LAST_CONFLICT (subscript
) = last_conflicts
;
5618 /* If there is any undetermined conflict function we have to
5619 give a conservative answer in case we cannot prove that
5620 no dependence exists when analyzing another subscript. */
5621 if (CF_NOT_KNOWN_P (overlaps_a
)
5622 || CF_NOT_KNOWN_P (overlaps_b
))
5624 res
= chrec_dont_know
;
5628 /* When there is a subscript with no dependence we can stop. */
5629 else if (CF_NO_DEPENDENCE_P (overlaps_a
)
5630 || CF_NO_DEPENDENCE_P (overlaps_b
))
5637 if (res
== NULL_TREE
)
5640 if (res
== chrec_known
)
5641 dependence_stats
.num_dependence_independent
++;
5643 dependence_stats
.num_dependence_undetermined
++;
5644 finalize_ddr_dependent (ddr
, res
);
5648 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
5651 subscript_dependence_tester (struct data_dependence_relation
*ddr
,
5652 class loop
*loop_nest
)
5654 if (subscript_dependence_tester_1 (ddr
, 0, 1, loop_nest
))
5655 dependence_stats
.num_dependence_dependent
++;
5657 compute_subscript_distance (ddr
);
5658 if (build_classic_dist_vector (ddr
, loop_nest
))
5660 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5664 fprintf (dump_file
, "(build_classic_dist_vector\n");
5665 for (i
= 0; i
< DDR_NUM_DIST_VECTS (ddr
); i
++)
5667 fprintf (dump_file
, " dist_vector = (");
5668 print_lambda_vector (dump_file
, DDR_DIST_VECT (ddr
, i
),
5669 DDR_NB_LOOPS (ddr
));
5670 fprintf (dump_file
, " )\n");
5672 fprintf (dump_file
, ")\n");
5675 build_classic_dir_vector (ddr
);
5679 /* Returns true when all the access functions of A are affine or
5680 constant with respect to LOOP_NEST. */
5683 access_functions_are_affine_or_constant_p (const struct data_reference
*a
,
5684 const class loop
*loop_nest
)
5686 vec
<tree
> fns
= DR_ACCESS_FNS (a
);
5688 if (!evolution_function_is_invariant_p (t
, loop_nest
->num
)
5689 && !evolution_function_is_affine_multivariate_p (t
, loop_nest
->num
))
5695 /* This computes the affine dependence relation between A and B with
5696 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
5697 independence between two accesses, while CHREC_DONT_KNOW is used
5698 for representing the unknown relation.
5700 Note that it is possible to stop the computation of the dependence
5701 relation the first time we detect a CHREC_KNOWN element for a given
5705 compute_affine_dependence (struct data_dependence_relation
*ddr
,
5706 class loop
*loop_nest
)
5708 struct data_reference
*dra
= DDR_A (ddr
);
5709 struct data_reference
*drb
= DDR_B (ddr
);
5711 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5713 fprintf (dump_file
, "(compute_affine_dependence\n");
5714 fprintf (dump_file
, " ref_a: ");
5715 print_generic_expr (dump_file
, DR_REF (dra
));
5716 fprintf (dump_file
, ", stmt_a: ");
5717 print_gimple_stmt (dump_file
, DR_STMT (dra
), 0, TDF_SLIM
);
5718 fprintf (dump_file
, " ref_b: ");
5719 print_generic_expr (dump_file
, DR_REF (drb
));
5720 fprintf (dump_file
, ", stmt_b: ");
5721 print_gimple_stmt (dump_file
, DR_STMT (drb
), 0, TDF_SLIM
);
5724 /* Analyze only when the dependence relation is not yet known. */
5725 if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
5727 dependence_stats
.num_dependence_tests
++;
5729 if (access_functions_are_affine_or_constant_p (dra
, loop_nest
)
5730 && access_functions_are_affine_or_constant_p (drb
, loop_nest
))
5731 subscript_dependence_tester (ddr
, loop_nest
);
5733 /* As a last case, if the dependence cannot be determined, or if
5734 the dependence is considered too difficult to determine, answer
5738 dependence_stats
.num_dependence_undetermined
++;
5740 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5742 fprintf (dump_file
, "Data ref a:\n");
5743 dump_data_reference (dump_file
, dra
);
5744 fprintf (dump_file
, "Data ref b:\n");
5745 dump_data_reference (dump_file
, drb
);
5746 fprintf (dump_file
, "affine dependence test not usable: access function not affine or constant.\n");
5748 finalize_ddr_dependent (ddr
, chrec_dont_know
);
5752 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5754 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
5755 fprintf (dump_file
, ") -> no dependence\n");
5756 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
5757 fprintf (dump_file
, ") -> dependence analysis failed\n");
5759 fprintf (dump_file
, ")\n");
5763 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5764 the data references in DATAREFS, in the LOOP_NEST. When
5765 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5766 relations. Return true when successful, i.e. data references number
5767 is small enough to be handled. */
5770 compute_all_dependences (const vec
<data_reference_p
> &datarefs
,
5771 vec
<ddr_p
> *dependence_relations
,
5772 const vec
<loop_p
> &loop_nest
,
5773 bool compute_self_and_rr
)
5775 struct data_dependence_relation
*ddr
;
5776 struct data_reference
*a
, *b
;
5779 if ((int) datarefs
.length ()
5780 > param_loop_max_datarefs_for_datadeps
)
5782 struct data_dependence_relation
*ddr
;
5784 /* Insert a single relation into dependence_relations:
5786 ddr
= initialize_data_dependence_relation (NULL
, NULL
, loop_nest
);
5787 dependence_relations
->safe_push (ddr
);
5791 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
5792 for (j
= i
+ 1; datarefs
.iterate (j
, &b
); j
++)
5793 if (DR_IS_WRITE (a
) || DR_IS_WRITE (b
) || compute_self_and_rr
)
5795 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
5796 dependence_relations
->safe_push (ddr
);
5797 if (loop_nest
.exists ())
5798 compute_affine_dependence (ddr
, loop_nest
[0]);
5801 if (compute_self_and_rr
)
5802 FOR_EACH_VEC_ELT (datarefs
, i
, a
)
5804 ddr
= initialize_data_dependence_relation (a
, a
, loop_nest
);
5805 dependence_relations
->safe_push (ddr
);
5806 if (loop_nest
.exists ())
5807 compute_affine_dependence (ddr
, loop_nest
[0]);
5813 /* Describes a location of a memory reference. */
5817 /* The memory reference. */
5820 /* True if the memory reference is read. */
5823 /* True if the data reference is conditional within the containing
5824 statement, i.e. if it might not occur even when the statement
5825 is executed and runs to completion. */
5826 bool is_conditional_in_stmt
;
5830 /* Stores the locations of memory references in STMT to REFERENCES. Returns
5831 true if STMT clobbers memory, false otherwise. */
5834 get_references_in_stmt (gimple
*stmt
, vec
<data_ref_loc
, va_heap
> *references
)
5836 bool clobbers_memory
= false;
5839 enum gimple_code stmt_code
= gimple_code (stmt
);
5841 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5842 As we cannot model data-references to not spelled out
5843 accesses give up if they may occur. */
5844 if (stmt_code
== GIMPLE_CALL
5845 && !(gimple_call_flags (stmt
) & ECF_CONST
))
5847 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */
5848 if (gimple_call_internal_p (stmt
))
5849 switch (gimple_call_internal_fn (stmt
))
5851 case IFN_GOMP_SIMD_LANE
:
5853 class loop
*loop
= gimple_bb (stmt
)->loop_father
;
5854 tree uid
= gimple_call_arg (stmt
, 0);
5855 gcc_assert (TREE_CODE (uid
) == SSA_NAME
);
5857 || loop
->simduid
!= SSA_NAME_VAR (uid
))
5858 clobbers_memory
= true;
5862 case IFN_MASK_STORE
:
5867 = gimple_call_addr_fndecl (gimple_call_arg (stmt
, 0));
5869 || (flags_from_decl_or_type (orig_fndecl
) & ECF_CONST
) == 0)
5870 clobbers_memory
= true;
5874 clobbers_memory
= true;
5877 else if (gimple_call_builtin_p (stmt
, BUILT_IN_PREFETCH
))
5878 clobbers_memory
= false;
5880 clobbers_memory
= true;
5882 else if (stmt_code
== GIMPLE_ASM
5883 && (gimple_asm_volatile_p (as_a
<gasm
*> (stmt
))
5884 || gimple_vuse (stmt
)))
5885 clobbers_memory
= true;
5887 if (!gimple_vuse (stmt
))
5888 return clobbers_memory
;
5890 if (stmt_code
== GIMPLE_ASSIGN
)
5893 op0
= gimple_assign_lhs (stmt
);
5894 op1
= gimple_assign_rhs1 (stmt
);
5897 || (REFERENCE_CLASS_P (op1
)
5898 && (base
= get_base_address (op1
))
5899 && TREE_CODE (base
) != SSA_NAME
5900 && !is_gimple_min_invariant (base
)))
5904 ref
.is_conditional_in_stmt
= false;
5905 references
->safe_push (ref
);
5908 else if (stmt_code
== GIMPLE_CALL
)
5914 ref
.is_read
= false;
5915 if (gimple_call_internal_p (stmt
))
5916 switch (gimple_call_internal_fn (stmt
))
5919 if (gimple_call_lhs (stmt
) == NULL_TREE
)
5923 case IFN_MASK_STORE
:
5924 ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)), 0);
5925 align
= tree_to_shwi (gimple_call_arg (stmt
, 1));
5927 type
= TREE_TYPE (gimple_call_lhs (stmt
));
5929 type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
5930 if (TYPE_ALIGN (type
) != align
)
5931 type
= build_aligned_type (type
, align
);
5932 ref
.is_conditional_in_stmt
= true;
5933 ref
.ref
= fold_build2 (MEM_REF
, type
, gimple_call_arg (stmt
, 0),
5935 references
->safe_push (ref
);
5944 op0
= gimple_call_lhs (stmt
);
5945 n
= gimple_call_num_args (stmt
);
5948 op1
= gimple_call_arg (stmt
, i
);
5951 || (REFERENCE_CLASS_P (op1
) && get_base_address (op1
)))
5955 ref
.is_conditional_in_stmt
= false;
5956 references
->safe_push (ref
);
5961 return clobbers_memory
;
5965 || (REFERENCE_CLASS_P (op0
) && get_base_address (op0
))))
5968 ref
.is_read
= false;
5969 ref
.is_conditional_in_stmt
= false;
5970 references
->safe_push (ref
);
5972 return clobbers_memory
;
5976 /* Returns true if the loop-nest has any data reference. */
5979 loop_nest_has_data_refs (loop_p loop
)
5981 basic_block
*bbs
= get_loop_body (loop
);
5982 auto_vec
<data_ref_loc
, 3> references
;
5984 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
5986 basic_block bb
= bbs
[i
];
5987 gimple_stmt_iterator bsi
;
5989 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
5991 gimple
*stmt
= gsi_stmt (bsi
);
5992 get_references_in_stmt (stmt
, &references
);
5993 if (references
.length ())
6004 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
6005 reference, returns false, otherwise returns true. NEST is the outermost
6006 loop of the loop nest in which the references should be analyzed. */
6009 find_data_references_in_stmt (class loop
*nest
, gimple
*stmt
,
6010 vec
<data_reference_p
> *datarefs
)
6012 auto_vec
<data_ref_loc
, 2> references
;
6013 data_reference_p dr
;
6015 if (get_references_in_stmt (stmt
, &references
))
6016 return opt_result::failure_at (stmt
, "statement clobbers memory: %G",
6019 for (const data_ref_loc
&ref
: references
)
6021 dr
= create_data_ref (nest
? loop_preheader_edge (nest
) : NULL
,
6022 loop_containing_stmt (stmt
), ref
.ref
,
6023 stmt
, ref
.is_read
, ref
.is_conditional_in_stmt
);
6024 gcc_assert (dr
!= NULL
);
6025 datarefs
->safe_push (dr
);
6028 return opt_result::success ();
6031 /* Stores the data references in STMT to DATAREFS. If there is an
6032 unanalyzable reference, returns false, otherwise returns true.
6033 NEST is the outermost loop of the loop nest in which the references
6034 should be instantiated, LOOP is the loop in which the references
6035 should be analyzed. */
6038 graphite_find_data_references_in_stmt (edge nest
, loop_p loop
, gimple
*stmt
,
6039 vec
<data_reference_p
> *datarefs
)
6041 auto_vec
<data_ref_loc
, 2> references
;
6043 data_reference_p dr
;
6045 if (get_references_in_stmt (stmt
, &references
))
6048 for (const data_ref_loc
&ref
: references
)
6050 dr
= create_data_ref (nest
, loop
, ref
.ref
, stmt
, ref
.is_read
,
6051 ref
.is_conditional_in_stmt
);
6052 gcc_assert (dr
!= NULL
);
6053 datarefs
->safe_push (dr
);
6059 /* Search the data references in LOOP, and record the information into
6060 DATAREFS. Returns chrec_dont_know when failing to analyze a
6061 difficult case, returns NULL_TREE otherwise. */
6064 find_data_references_in_bb (class loop
*loop
, basic_block bb
,
6065 vec
<data_reference_p
> *datarefs
)
6067 gimple_stmt_iterator bsi
;
6069 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
6071 gimple
*stmt
= gsi_stmt (bsi
);
6073 if (!find_data_references_in_stmt (loop
, stmt
, datarefs
))
6075 struct data_reference
*res
;
6076 res
= XCNEW (struct data_reference
);
6077 datarefs
->safe_push (res
);
6079 return chrec_dont_know
;
6086 /* Search the data references in LOOP, and record the information into
6087 DATAREFS. Returns chrec_dont_know when failing to analyze a
6088 difficult case, returns NULL_TREE otherwise.
6090 TODO: This function should be made smarter so that it can handle address
6091 arithmetic as if they were array accesses, etc. */
6094 find_data_references_in_loop (class loop
*loop
,
6095 vec
<data_reference_p
> *datarefs
)
6097 basic_block bb
, *bbs
;
6100 bbs
= get_loop_body_in_dom_order (loop
);
6102 for (i
= 0; i
< loop
->num_nodes
; i
++)
6106 if (find_data_references_in_bb (loop
, bb
, datarefs
) == chrec_dont_know
)
6109 return chrec_dont_know
;
6117 /* Return the alignment in bytes that DRB is guaranteed to have at all
6121 dr_alignment (innermost_loop_behavior
*drb
)
6123 /* Get the alignment of BASE_ADDRESS + INIT. */
6124 unsigned int alignment
= drb
->base_alignment
;
6125 unsigned int misalignment
= (drb
->base_misalignment
6126 + TREE_INT_CST_LOW (drb
->init
));
6127 if (misalignment
!= 0)
6128 alignment
= MIN (alignment
, misalignment
& -misalignment
);
6130 /* Cap it to the alignment of OFFSET. */
6131 if (!integer_zerop (drb
->offset
))
6132 alignment
= MIN (alignment
, drb
->offset_alignment
);
6134 /* Cap it to the alignment of STEP. */
6135 if (!integer_zerop (drb
->step
))
6136 alignment
= MIN (alignment
, drb
->step_alignment
);
6141 /* If BASE is a pointer-typed SSA name, try to find the object that it
6142 is based on. Return this object X on success and store the alignment
6143 in bytes of BASE - &X in *ALIGNMENT_OUT. */
6146 get_base_for_alignment_1 (tree base
, unsigned int *alignment_out
)
6148 if (TREE_CODE (base
) != SSA_NAME
|| !POINTER_TYPE_P (TREE_TYPE (base
)))
6151 gimple
*def
= SSA_NAME_DEF_STMT (base
);
6152 base
= analyze_scalar_evolution (loop_containing_stmt (def
), base
);
6154 /* Peel chrecs and record the minimum alignment preserved by
6156 unsigned int alignment
= MAX_OFILE_ALIGNMENT
/ BITS_PER_UNIT
;
6157 while (TREE_CODE (base
) == POLYNOMIAL_CHREC
)
6159 unsigned int step_alignment
= highest_pow2_factor (CHREC_RIGHT (base
));
6160 alignment
= MIN (alignment
, step_alignment
);
6161 base
= CHREC_LEFT (base
);
6164 /* Punt if the expression is too complicated to handle. */
6165 if (tree_contains_chrecs (base
, NULL
) || !POINTER_TYPE_P (TREE_TYPE (base
)))
6168 /* The only useful cases are those for which a dereference folds to something
6169 other than an INDIRECT_REF. */
6170 tree ref_type
= TREE_TYPE (TREE_TYPE (base
));
6171 tree ref
= fold_indirect_ref_1 (UNKNOWN_LOCATION
, ref_type
, base
);
6175 /* Analyze the base to which the steps we peeled were applied. */
6176 poly_int64 bitsize
, bitpos
, bytepos
;
6178 int unsignedp
, reversep
, volatilep
;
6180 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &offset
, &mode
,
6181 &unsignedp
, &reversep
, &volatilep
);
6182 if (!base
|| !multiple_p (bitpos
, BITS_PER_UNIT
, &bytepos
))
6185 /* Restrict the alignment to that guaranteed by the offsets. */
6186 unsigned int bytepos_alignment
= known_alignment (bytepos
);
6187 if (bytepos_alignment
!= 0)
6188 alignment
= MIN (alignment
, bytepos_alignment
);
6191 unsigned int offset_alignment
= highest_pow2_factor (offset
);
6192 alignment
= MIN (alignment
, offset_alignment
);
6195 *alignment_out
= alignment
;
6199 /* Return the object whose alignment would need to be changed in order
6200 to increase the alignment of ADDR. Store the maximum achievable
6201 alignment in *MAX_ALIGNMENT. */
6204 get_base_for_alignment (tree addr
, unsigned int *max_alignment
)
6206 tree base
= get_base_for_alignment_1 (addr
, max_alignment
);
6210 if (TREE_CODE (addr
) == ADDR_EXPR
)
6211 addr
= TREE_OPERAND (addr
, 0);
6212 *max_alignment
= MAX_OFILE_ALIGNMENT
/ BITS_PER_UNIT
;
6216 /* Recursive helper function. */
6219 find_loop_nest_1 (class loop
*loop
, vec
<loop_p
> *loop_nest
)
6221 /* Inner loops of the nest should not contain siblings. Example:
6222 when there are two consecutive loops,
6233 the dependence relation cannot be captured by the distance
6238 loop_nest
->safe_push (loop
);
6240 return find_loop_nest_1 (loop
->inner
, loop_nest
);
6244 /* Return false when the LOOP is not well nested. Otherwise return
6245 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
6246 contain the loops from the outermost to the innermost, as they will
6247 appear in the classic distance vector. */
6250 find_loop_nest (class loop
*loop
, vec
<loop_p
> *loop_nest
)
6252 loop_nest
->safe_push (loop
);
6254 return find_loop_nest_1 (loop
->inner
, loop_nest
);
6258 /* Returns true when the data dependences have been computed, false otherwise.
6259 Given a loop nest LOOP, the following vectors are returned:
6260 DATAREFS is initialized to all the array elements contained in this loop,
6261 DEPENDENCE_RELATIONS contains the relations between the data references.
6262 Compute read-read and self relations if
6263 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
6266 compute_data_dependences_for_loop (class loop
*loop
,
6267 bool compute_self_and_read_read_dependences
,
6268 vec
<loop_p
> *loop_nest
,
6269 vec
<data_reference_p
> *datarefs
,
6270 vec
<ddr_p
> *dependence_relations
)
6274 memset (&dependence_stats
, 0, sizeof (dependence_stats
));
6276 /* If the loop nest is not well formed, or one of the data references
6277 is not computable, give up without spending time to compute other
6280 || !find_loop_nest (loop
, loop_nest
)
6281 || find_data_references_in_loop (loop
, datarefs
) == chrec_dont_know
6282 || !compute_all_dependences (*datarefs
, dependence_relations
, *loop_nest
,
6283 compute_self_and_read_read_dependences
))
6286 if (dump_file
&& (dump_flags
& TDF_STATS
))
6288 fprintf (dump_file
, "Dependence tester statistics:\n");
6290 fprintf (dump_file
, "Number of dependence tests: %d\n",
6291 dependence_stats
.num_dependence_tests
);
6292 fprintf (dump_file
, "Number of dependence tests classified dependent: %d\n",
6293 dependence_stats
.num_dependence_dependent
);
6294 fprintf (dump_file
, "Number of dependence tests classified independent: %d\n",
6295 dependence_stats
.num_dependence_independent
);
6296 fprintf (dump_file
, "Number of undetermined dependence tests: %d\n",
6297 dependence_stats
.num_dependence_undetermined
);
6299 fprintf (dump_file
, "Number of subscript tests: %d\n",
6300 dependence_stats
.num_subscript_tests
);
6301 fprintf (dump_file
, "Number of undetermined subscript tests: %d\n",
6302 dependence_stats
.num_subscript_undetermined
);
6303 fprintf (dump_file
, "Number of same subscript function: %d\n",
6304 dependence_stats
.num_same_subscript_function
);
6306 fprintf (dump_file
, "Number of ziv tests: %d\n",
6307 dependence_stats
.num_ziv
);
6308 fprintf (dump_file
, "Number of ziv tests returning dependent: %d\n",
6309 dependence_stats
.num_ziv_dependent
);
6310 fprintf (dump_file
, "Number of ziv tests returning independent: %d\n",
6311 dependence_stats
.num_ziv_independent
);
6312 fprintf (dump_file
, "Number of ziv tests unimplemented: %d\n",
6313 dependence_stats
.num_ziv_unimplemented
);
6315 fprintf (dump_file
, "Number of siv tests: %d\n",
6316 dependence_stats
.num_siv
);
6317 fprintf (dump_file
, "Number of siv tests returning dependent: %d\n",
6318 dependence_stats
.num_siv_dependent
);
6319 fprintf (dump_file
, "Number of siv tests returning independent: %d\n",
6320 dependence_stats
.num_siv_independent
);
6321 fprintf (dump_file
, "Number of siv tests unimplemented: %d\n",
6322 dependence_stats
.num_siv_unimplemented
);
6324 fprintf (dump_file
, "Number of miv tests: %d\n",
6325 dependence_stats
.num_miv
);
6326 fprintf (dump_file
, "Number of miv tests returning dependent: %d\n",
6327 dependence_stats
.num_miv_dependent
);
6328 fprintf (dump_file
, "Number of miv tests returning independent: %d\n",
6329 dependence_stats
.num_miv_independent
);
6330 fprintf (dump_file
, "Number of miv tests unimplemented: %d\n",
6331 dependence_stats
.num_miv_unimplemented
);
6337 /* Free the memory used by a data dependence relation DDR. */
6340 free_dependence_relation (struct data_dependence_relation
*ddr
)
6345 if (DDR_SUBSCRIPTS (ddr
).exists ())
6346 free_subscripts (DDR_SUBSCRIPTS (ddr
));
6347 DDR_DIST_VECTS (ddr
).release ();
6348 DDR_DIR_VECTS (ddr
).release ();
6353 /* Free the memory used by the data dependence relations from
6354 DEPENDENCE_RELATIONS. */
6357 free_dependence_relations (vec
<ddr_p
>& dependence_relations
)
6359 for (data_dependence_relation
*ddr
: dependence_relations
)
6361 free_dependence_relation (ddr
);
6363 dependence_relations
.release ();
6366 /* Free the memory used by the data references from DATAREFS. */
6369 free_data_refs (vec
<data_reference_p
>& datarefs
)
6371 for (data_reference
*dr
: datarefs
)
6373 datarefs
.release ();
6376 /* Common routine implementing both dr_direction_indicator and
6377 dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known
6378 to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6379 Return the step as the indicator otherwise. */
6382 dr_step_indicator (struct data_reference
*dr
, int useful_min
)
6384 tree step
= DR_STEP (dr
);
6388 /* Look for cases where the step is scaled by a positive constant
6389 integer, which will often be the access size. If the multiplication
6390 doesn't change the sign (due to overflow effects) then we can
6391 test the unscaled value instead. */
6392 if (TREE_CODE (step
) == MULT_EXPR
6393 && TREE_CODE (TREE_OPERAND (step
, 1)) == INTEGER_CST
6394 && tree_int_cst_sgn (TREE_OPERAND (step
, 1)) > 0)
6396 tree factor
= TREE_OPERAND (step
, 1);
6397 step
= TREE_OPERAND (step
, 0);
6399 /* Strip widening and truncating conversions as well as nops. */
6400 if (CONVERT_EXPR_P (step
)
6401 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step
, 0))))
6402 step
= TREE_OPERAND (step
, 0);
6403 tree type
= TREE_TYPE (step
);
6405 /* Get the range of step values that would not cause overflow. */
6406 widest_int minv
= (wi::to_widest (TYPE_MIN_VALUE (ssizetype
))
6407 / wi::to_widest (factor
));
6408 widest_int maxv
= (wi::to_widest (TYPE_MAX_VALUE (ssizetype
))
6409 / wi::to_widest (factor
));
6411 /* Get the range of values that the unconverted step actually has. */
6412 wide_int step_min
, step_max
;
6414 if (TREE_CODE (step
) != SSA_NAME
6415 || !get_range_query (cfun
)->range_of_expr (vr
, step
)
6416 || vr
.undefined_p ())
6418 step_min
= wi::to_wide (TYPE_MIN_VALUE (type
));
6419 step_max
= wi::to_wide (TYPE_MAX_VALUE (type
));
6423 step_min
= vr
.lower_bound ();
6424 step_max
= vr
.upper_bound ();
6427 /* Check whether the unconverted step has an acceptable range. */
6428 signop sgn
= TYPE_SIGN (type
);
6429 if (wi::les_p (minv
, widest_int::from (step_min
, sgn
))
6430 && wi::ges_p (maxv
, widest_int::from (step_max
, sgn
)))
6432 if (wi::ge_p (step_min
, useful_min
, sgn
))
6433 return ssize_int (useful_min
);
6434 else if (wi::lt_p (step_max
, 0, sgn
))
6435 return ssize_int (-1);
6437 return fold_convert (ssizetype
, step
);
6440 return DR_STEP (dr
);
6443 /* Return a value that is negative iff DR has a negative step. */
6446 dr_direction_indicator (struct data_reference
*dr
)
6448 return dr_step_indicator (dr
, 0);
6451 /* Return a value that is zero iff DR has a zero step. */
6454 dr_zero_step_indicator (struct data_reference
*dr
)
6456 return dr_step_indicator (dr
, 1);
6459 /* Return true if DR is known to have a nonnegative (but possibly zero)
6463 dr_known_forward_stride_p (struct data_reference
*dr
)
6465 tree indicator
= dr_direction_indicator (dr
);
6466 tree neg_step_val
= fold_binary (LT_EXPR
, boolean_type_node
,
6467 fold_convert (ssizetype
, indicator
),
6469 return neg_step_val
&& integer_zerop (neg_step_val
);