[AMDGPU] Test codegen'ing True16 additions.
[llvm-project.git] / polly / lib / Transform / DeLICM.cpp
blob51e701346563a1992c3eef3610887835039ec126
1 //===------ DeLICM.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Undo the effect of Loop Invariant Code Motion (LICM) and
10 // GVN Partial Redundancy Elimination (PRE) on SCoP-level.
12 // Namely, remove register/scalar dependencies by mapping them back to array
13 // elements.
15 //===----------------------------------------------------------------------===//
17 #include "polly/DeLICM.h"
18 #include "polly/LinkAllPasses.h"
19 #include "polly/Options.h"
20 #include "polly/ScopInfo.h"
21 #include "polly/ScopPass.h"
22 #include "polly/Support/GICHelper.h"
23 #include "polly/Support/ISLOStream.h"
24 #include "polly/Support/ISLTools.h"
25 #include "polly/ZoneAlgo.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/InitializePasses.h"
29 #define DEBUG_TYPE "polly-delicm"
31 using namespace polly;
32 using namespace llvm;
34 namespace {
36 cl::opt<int>
37 DelicmMaxOps("polly-delicm-max-ops",
38 cl::desc("Maximum number of isl operations to invest for "
39 "lifetime analysis; 0=no limit"),
40 cl::init(1000000), cl::cat(PollyCategory));
42 cl::opt<bool> DelicmOverapproximateWrites(
43 "polly-delicm-overapproximate-writes",
44 cl::desc(
45 "Do more PHI writes than necessary in order to avoid partial accesses"),
46 cl::init(false), cl::Hidden, cl::cat(PollyCategory));
48 cl::opt<bool> DelicmPartialWrites("polly-delicm-partial-writes",
49 cl::desc("Allow partial writes"),
50 cl::init(true), cl::Hidden,
51 cl::cat(PollyCategory));
53 cl::opt<bool>
54 DelicmComputeKnown("polly-delicm-compute-known",
55 cl::desc("Compute known content of array elements"),
56 cl::init(true), cl::Hidden, cl::cat(PollyCategory));
58 STATISTIC(DeLICMAnalyzed, "Number of successfully analyzed SCoPs");
59 STATISTIC(DeLICMOutOfQuota,
60 "Analyses aborted because max_operations was reached");
61 STATISTIC(MappedValueScalars, "Number of mapped Value scalars");
62 STATISTIC(MappedPHIScalars, "Number of mapped PHI scalars");
63 STATISTIC(TargetsMapped, "Number of stores used for at least one mapping");
64 STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized");
66 STATISTIC(NumValueWrites, "Number of scalar value writes after DeLICM");
67 STATISTIC(NumValueWritesInLoops,
68 "Number of scalar value writes nested in affine loops after DeLICM");
69 STATISTIC(NumPHIWrites, "Number of scalar phi writes after DeLICM");
70 STATISTIC(NumPHIWritesInLoops,
71 "Number of scalar phi writes nested in affine loops after DeLICM");
72 STATISTIC(NumSingletonWrites, "Number of singleton writes after DeLICM");
73 STATISTIC(NumSingletonWritesInLoops,
74 "Number of singleton writes nested in affine loops after DeLICM");
76 isl::union_map computeReachingOverwrite(isl::union_map Schedule,
77 isl::union_map Writes,
78 bool InclPrevWrite,
79 bool InclOverwrite) {
80 return computeReachingWrite(Schedule, Writes, true, InclPrevWrite,
81 InclOverwrite);
84 /// Compute the next overwrite for a scalar.
85 ///
86 /// @param Schedule { DomainWrite[] -> Scatter[] }
87 /// Schedule of (at least) all writes. Instances not in @p
88 /// Writes are ignored.
89 /// @param Writes { DomainWrite[] }
90 /// The element instances that write to the scalar.
91 /// @param InclPrevWrite Whether to extend the timepoints to include
92 /// the timepoint where the previous write happens.
93 /// @param InclOverwrite Whether the reaching overwrite includes the timepoint
94 /// of the overwrite itself.
95 ///
96 /// @return { Scatter[] -> DomainDef[] }
97 isl::union_map computeScalarReachingOverwrite(isl::union_map Schedule,
98 isl::union_set Writes,
99 bool InclPrevWrite,
100 bool InclOverwrite) {
102 // { DomainWrite[] }
103 auto WritesMap = isl::union_map::from_domain(Writes);
105 // { [Element[] -> Scatter[]] -> DomainWrite[] }
106 auto Result = computeReachingOverwrite(
107 std::move(Schedule), std::move(WritesMap), InclPrevWrite, InclOverwrite);
109 return Result.domain_factor_range();
112 /// Overload of computeScalarReachingOverwrite, with only one writing statement.
113 /// Consequently, the result consists of only one map space.
115 /// @param Schedule { DomainWrite[] -> Scatter[] }
116 /// @param Writes { DomainWrite[] }
117 /// @param InclPrevWrite Include the previous write to result.
118 /// @param InclOverwrite Include the overwrite to the result.
120 /// @return { Scatter[] -> DomainWrite[] }
121 isl::map computeScalarReachingOverwrite(isl::union_map Schedule,
122 isl::set Writes, bool InclPrevWrite,
123 bool InclOverwrite) {
124 isl::space ScatterSpace = getScatterSpace(Schedule);
125 isl::space DomSpace = Writes.get_space();
127 isl::union_map ReachOverwrite = computeScalarReachingOverwrite(
128 Schedule, isl::union_set(Writes), InclPrevWrite, InclOverwrite);
130 isl::space ResultSpace = ScatterSpace.map_from_domain_and_range(DomSpace);
131 return singleton(std::move(ReachOverwrite), ResultSpace);
134 /// Try to find a 'natural' extension of a mapped to elements outside its
135 /// domain.
137 /// @param Relevant The map with mapping that may not be modified.
138 /// @param Universe The domain to which @p Relevant needs to be extended.
140 /// @return A map with that associates the domain elements of @p Relevant to the
141 /// same elements and in addition the elements of @p Universe to some
142 /// undefined elements. The function prefers to return simple maps.
143 isl::union_map expandMapping(isl::union_map Relevant, isl::union_set Universe) {
144 Relevant = Relevant.coalesce();
145 isl::union_set RelevantDomain = Relevant.domain();
146 isl::union_map Simplified = Relevant.gist_domain(RelevantDomain);
147 Simplified = Simplified.coalesce();
148 return Simplified.intersect_domain(Universe);
151 /// Represent the knowledge of the contents of any array elements in any zone or
152 /// the knowledge we would add when mapping a scalar to an array element.
154 /// Every array element at every zone unit has one of two states:
156 /// - Unused: Not occupied by any value so a transformation can change it to
157 /// other values.
159 /// - Occupied: The element contains a value that is still needed.
161 /// The union of Unused and Unknown zones forms the universe, the set of all
162 /// elements at every timepoint. The universe can easily be derived from the
163 /// array elements that are accessed someway. Arrays that are never accessed
164 /// also never play a role in any computation and can hence be ignored. With a
165 /// given universe, only one of the sets needs to stored implicitly. Computing
166 /// the complement is also an expensive operation, hence this class has been
167 /// designed that only one of sets is needed while the other is assumed to be
168 /// implicit. It can still be given, but is mostly ignored.
170 /// There are two use cases for the Knowledge class:
172 /// 1) To represent the knowledge of the current state of ScopInfo. The unused
173 /// state means that an element is currently unused: there is no read of it
174 /// before the next overwrite. Also called 'Existing'.
176 /// 2) To represent the requirements for mapping a scalar to array elements. The
177 /// unused state means that there is no change/requirement. Also called
178 /// 'Proposed'.
180 /// In addition to these states at unit zones, Knowledge needs to know when
181 /// values are written. This is because written values may have no lifetime (one
182 /// reason is that the value is never read). Such writes would therefore never
183 /// conflict, but overwrite values that might still be required. Another source
184 /// of problems are multiple writes to the same element at the same timepoint,
185 /// because their order is undefined.
186 class Knowledge final {
187 private:
188 /// { [Element[] -> Zone[]] }
189 /// Set of array elements and when they are alive.
190 /// Can contain a nullptr; in this case the set is implicitly defined as the
191 /// complement of #Unused.
193 /// The set of alive array elements is represented as zone, as the set of live
194 /// values can differ depending on how the elements are interpreted.
195 /// Assuming a value X is written at timestep [0] and read at timestep [1]
196 /// without being used at any later point, then the value is alive in the
197 /// interval ]0,1[. This interval cannot be represented by an integer set, as
198 /// it does not contain any integer point. Zones allow us to represent this
199 /// interval and can be converted to sets of timepoints when needed (e.g., in
200 /// isConflicting when comparing to the write sets).
201 /// @see convertZoneToTimepoints and this file's comment for more details.
202 isl::union_set Occupied;
204 /// { [Element[] -> Zone[]] }
205 /// Set of array elements when they are not alive, i.e. their memory can be
206 /// used for other purposed. Can contain a nullptr; in this case the set is
207 /// implicitly defined as the complement of #Occupied.
208 isl::union_set Unused;
210 /// { [Element[] -> Zone[]] -> ValInst[] }
211 /// Maps to the known content for each array element at any interval.
213 /// Any element/interval can map to multiple known elements. This is due to
214 /// multiple llvm::Value referring to the same content. Examples are
216 /// - A value stored and loaded again. The LoadInst represents the same value
217 /// as the StoreInst's value operand.
219 /// - A PHINode is equal to any one of the incoming values. In case of
220 /// LCSSA-form, it is always equal to its single incoming value.
222 /// Two Knowledges are considered not conflicting if at least one of the known
223 /// values match. Not known values are not stored as an unnamed tuple (as
224 /// #Written does), but maps to nothing.
226 /// Known values are usually just defined for #Occupied elements. Knowing
227 /// #Unused contents has no advantage as it can be overwritten.
228 isl::union_map Known;
230 /// { [Element[] -> Scatter[]] -> ValInst[] }
231 /// The write actions currently in the scop or that would be added when
232 /// mapping a scalar. Maps to the value that is written.
234 /// Written values that cannot be identified are represented by an unknown
235 /// ValInst[] (an unnamed tuple of 0 dimension). It conflicts with itself.
236 isl::union_map Written;
238 /// Check whether this Knowledge object is well-formed.
239 void checkConsistency() const {
240 #ifndef NDEBUG
241 // Default-initialized object
242 if (Occupied.is_null() && Unused.is_null() && Known.is_null() &&
243 Written.is_null())
244 return;
246 assert(!Occupied.is_null() || !Unused.is_null());
247 assert(!Known.is_null());
248 assert(!Written.is_null());
250 // If not all fields are defined, we cannot derived the universe.
251 if (Occupied.is_null() || Unused.is_null())
252 return;
254 assert(Occupied.is_disjoint(Unused));
255 auto Universe = Occupied.unite(Unused);
257 assert(!Known.domain().is_subset(Universe).is_false());
258 assert(!Written.domain().is_subset(Universe).is_false());
259 #endif
262 public:
263 /// Initialize a nullptr-Knowledge. This is only provided for convenience; do
264 /// not use such an object.
265 Knowledge() {}
267 /// Create a new object with the given members.
268 Knowledge(isl::union_set Occupied, isl::union_set Unused,
269 isl::union_map Known, isl::union_map Written)
270 : Occupied(std::move(Occupied)), Unused(std::move(Unused)),
271 Known(std::move(Known)), Written(std::move(Written)) {
272 checkConsistency();
275 /// Return whether this object was not default-constructed.
276 bool isUsable() const {
277 return (Occupied.is_null() || Unused.is_null()) && !Known.is_null() &&
278 !Written.is_null();
281 /// Print the content of this object to @p OS.
282 void print(llvm::raw_ostream &OS, unsigned Indent = 0) const {
283 if (isUsable()) {
284 if (!Occupied.is_null())
285 OS.indent(Indent) << "Occupied: " << Occupied << "\n";
286 else
287 OS.indent(Indent) << "Occupied: <Everything else not in Unused>\n";
288 if (!Unused.is_null())
289 OS.indent(Indent) << "Unused: " << Unused << "\n";
290 else
291 OS.indent(Indent) << "Unused: <Everything else not in Occupied>\n";
292 OS.indent(Indent) << "Known: " << Known << "\n";
293 OS.indent(Indent) << "Written : " << Written << '\n';
294 } else {
295 OS.indent(Indent) << "Invalid knowledge\n";
299 /// Combine two knowledges, this and @p That.
300 void learnFrom(Knowledge That) {
301 assert(!isConflicting(*this, That));
302 assert(!Unused.is_null() && !That.Occupied.is_null());
303 assert(
304 That.Unused.is_null() &&
305 "This function is only prepared to learn occupied elements from That");
306 assert(Occupied.is_null() && "This function does not implement "
307 "`this->Occupied = "
308 "this->Occupied.unite(That.Occupied);`");
310 Unused = Unused.subtract(That.Occupied);
311 Known = Known.unite(That.Known);
312 Written = Written.unite(That.Written);
314 checkConsistency();
317 /// Determine whether two Knowledges conflict with each other.
319 /// In theory @p Existing and @p Proposed are symmetric, but the
320 /// implementation is constrained by the implicit interpretation. That is, @p
321 /// Existing must have #Unused defined (use case 1) and @p Proposed must have
322 /// #Occupied defined (use case 1).
324 /// A conflict is defined as non-preserved semantics when they are merged. For
325 /// instance, when for the same array and zone they assume different
326 /// llvm::Values.
328 /// @param Existing One of the knowledges with #Unused defined.
329 /// @param Proposed One of the knowledges with #Occupied defined.
330 /// @param OS Dump the conflict reason to this output stream; use
331 /// nullptr to not output anything.
332 /// @param Indent Indention for the conflict reason.
334 /// @return True, iff the two knowledges are conflicting.
335 static bool isConflicting(const Knowledge &Existing,
336 const Knowledge &Proposed,
337 llvm::raw_ostream *OS = nullptr,
338 unsigned Indent = 0) {
339 assert(!Existing.Unused.is_null());
340 assert(!Proposed.Occupied.is_null());
342 #ifndef NDEBUG
343 if (!Existing.Occupied.is_null() && !Proposed.Unused.is_null()) {
344 auto ExistingUniverse = Existing.Occupied.unite(Existing.Unused);
345 auto ProposedUniverse = Proposed.Occupied.unite(Proposed.Unused);
346 assert(ExistingUniverse.is_equal(ProposedUniverse) &&
347 "Both inputs' Knowledges must be over the same universe");
349 #endif
351 // Do the Existing and Proposed lifetimes conflict?
353 // Lifetimes are described as the cross-product of array elements and zone
354 // intervals in which they are alive (the space { [Element[] -> Zone[]] }).
355 // In the following we call this "element/lifetime interval".
357 // In order to not conflict, one of the following conditions must apply for
358 // each element/lifetime interval:
360 // 1. If occupied in one of the knowledges, it is unused in the other.
362 // - or -
364 // 2. Both contain the same value.
366 // Instead of partitioning the element/lifetime intervals into a part that
367 // both Knowledges occupy (which requires an expensive subtraction) and for
368 // these to check whether they are known to be the same value, we check only
369 // the second condition and ensure that it also applies when then first
370 // condition is true. This is done by adding a wildcard value to
371 // Proposed.Known and Existing.Unused such that they match as a common known
372 // value. We use the "unknown ValInst" for this purpose. Every
373 // Existing.Unused may match with an unknown Proposed.Occupied because these
374 // never are in conflict with each other.
375 auto ProposedOccupiedAnyVal = makeUnknownForDomain(Proposed.Occupied);
376 auto ProposedValues = Proposed.Known.unite(ProposedOccupiedAnyVal);
378 auto ExistingUnusedAnyVal = makeUnknownForDomain(Existing.Unused);
379 auto ExistingValues = Existing.Known.unite(ExistingUnusedAnyVal);
381 auto MatchingVals = ExistingValues.intersect(ProposedValues);
382 auto Matches = MatchingVals.domain();
384 // Any Proposed.Occupied must either have a match between the known values
385 // of Existing and Occupied, or be in Existing.Unused. In the latter case,
386 // the previously added "AnyVal" will match each other.
387 if (!Proposed.Occupied.is_subset(Matches)) {
388 if (OS) {
389 auto Conflicting = Proposed.Occupied.subtract(Matches);
390 auto ExistingConflictingKnown =
391 Existing.Known.intersect_domain(Conflicting);
392 auto ProposedConflictingKnown =
393 Proposed.Known.intersect_domain(Conflicting);
395 OS->indent(Indent) << "Proposed lifetime conflicting with Existing's\n";
396 OS->indent(Indent) << "Conflicting occupied: " << Conflicting << "\n";
397 if (!ExistingConflictingKnown.is_empty())
398 OS->indent(Indent)
399 << "Existing Known: " << ExistingConflictingKnown << "\n";
400 if (!ProposedConflictingKnown.is_empty())
401 OS->indent(Indent)
402 << "Proposed Known: " << ProposedConflictingKnown << "\n";
404 return true;
407 // Do the writes in Existing conflict with occupied values in Proposed?
409 // In order to not conflict, it must either write to unused lifetime or
410 // write the same value. To check, we remove the writes that write into
411 // Proposed.Unused (they never conflict) and then see whether the written
412 // value is already in Proposed.Known. If there are multiple known values
413 // and a written value is known under different names, it is enough when one
414 // of the written values (assuming that they are the same value under
415 // different names, e.g. a PHINode and one of the incoming values) matches
416 // one of the known names.
418 // We convert here the set of lifetimes to actual timepoints. A lifetime is
419 // in conflict with a set of write timepoints, if either a live timepoint is
420 // clearly within the lifetime or if a write happens at the beginning of the
421 // lifetime (where it would conflict with the value that actually writes the
422 // value alive). There is no conflict at the end of a lifetime, as the alive
423 // value will always be read, before it is overwritten again. The last
424 // property holds in Polly for all scalar values and we expect all users of
425 // Knowledge to check this property also for accesses to MemoryKind::Array.
426 auto ProposedFixedDefs =
427 convertZoneToTimepoints(Proposed.Occupied, true, false);
428 auto ProposedFixedKnown =
429 convertZoneToTimepoints(Proposed.Known, isl::dim::in, true, false);
431 auto ExistingConflictingWrites =
432 Existing.Written.intersect_domain(ProposedFixedDefs);
433 auto ExistingConflictingWritesDomain = ExistingConflictingWrites.domain();
435 auto CommonWrittenVal =
436 ProposedFixedKnown.intersect(ExistingConflictingWrites);
437 auto CommonWrittenValDomain = CommonWrittenVal.domain();
439 if (!ExistingConflictingWritesDomain.is_subset(CommonWrittenValDomain)) {
440 if (OS) {
441 auto ExistingConflictingWritten =
442 ExistingConflictingWrites.subtract_domain(CommonWrittenValDomain);
443 auto ProposedConflictingKnown = ProposedFixedKnown.subtract_domain(
444 ExistingConflictingWritten.domain());
446 OS->indent(Indent)
447 << "Proposed a lifetime where there is an Existing write into it\n";
448 OS->indent(Indent) << "Existing conflicting writes: "
449 << ExistingConflictingWritten << "\n";
450 if (!ProposedConflictingKnown.is_empty())
451 OS->indent(Indent)
452 << "Proposed conflicting known: " << ProposedConflictingKnown
453 << "\n";
455 return true;
458 // Do the writes in Proposed conflict with occupied values in Existing?
459 auto ExistingAvailableDefs =
460 convertZoneToTimepoints(Existing.Unused, true, false);
461 auto ExistingKnownDefs =
462 convertZoneToTimepoints(Existing.Known, isl::dim::in, true, false);
464 auto ProposedWrittenDomain = Proposed.Written.domain();
465 auto KnownIdentical = ExistingKnownDefs.intersect(Proposed.Written);
466 auto IdenticalOrUnused =
467 ExistingAvailableDefs.unite(KnownIdentical.domain());
468 if (!ProposedWrittenDomain.is_subset(IdenticalOrUnused)) {
469 if (OS) {
470 auto Conflicting = ProposedWrittenDomain.subtract(IdenticalOrUnused);
471 auto ExistingConflictingKnown =
472 ExistingKnownDefs.intersect_domain(Conflicting);
473 auto ProposedConflictingWritten =
474 Proposed.Written.intersect_domain(Conflicting);
476 OS->indent(Indent) << "Proposed writes into range used by Existing\n";
477 OS->indent(Indent) << "Proposed conflicting writes: "
478 << ProposedConflictingWritten << "\n";
479 if (!ExistingConflictingKnown.is_empty())
480 OS->indent(Indent)
481 << "Existing conflicting known: " << ExistingConflictingKnown
482 << "\n";
484 return true;
487 // Does Proposed write at the same time as Existing already does (order of
488 // writes is undefined)? Writing the same value is permitted.
489 auto ExistingWrittenDomain = Existing.Written.domain();
490 auto BothWritten =
491 Existing.Written.domain().intersect(Proposed.Written.domain());
492 auto ExistingKnownWritten = filterKnownValInst(Existing.Written);
493 auto ProposedKnownWritten = filterKnownValInst(Proposed.Written);
494 auto CommonWritten =
495 ExistingKnownWritten.intersect(ProposedKnownWritten).domain();
497 if (!BothWritten.is_subset(CommonWritten)) {
498 if (OS) {
499 auto Conflicting = BothWritten.subtract(CommonWritten);
500 auto ExistingConflictingWritten =
501 Existing.Written.intersect_domain(Conflicting);
502 auto ProposedConflictingWritten =
503 Proposed.Written.intersect_domain(Conflicting);
505 OS->indent(Indent) << "Proposed writes at the same time as an already "
506 "Existing write\n";
507 OS->indent(Indent) << "Conflicting writes: " << Conflicting << "\n";
508 if (!ExistingConflictingWritten.is_empty())
509 OS->indent(Indent)
510 << "Exiting write: " << ExistingConflictingWritten << "\n";
511 if (!ProposedConflictingWritten.is_empty())
512 OS->indent(Indent)
513 << "Proposed write: " << ProposedConflictingWritten << "\n";
515 return true;
518 return false;
522 /// Implementation of the DeLICM/DePRE transformation.
523 class DeLICMImpl final : public ZoneAlgorithm {
524 private:
525 /// Knowledge before any transformation took place.
526 Knowledge OriginalZone;
528 /// Current knowledge of the SCoP including all already applied
529 /// transformations.
530 Knowledge Zone;
532 /// Number of StoreInsts something can be mapped to.
533 int NumberOfCompatibleTargets = 0;
535 /// The number of StoreInsts to which at least one value or PHI has been
536 /// mapped to.
537 int NumberOfTargetsMapped = 0;
539 /// The number of llvm::Value mapped to some array element.
540 int NumberOfMappedValueScalars = 0;
542 /// The number of PHIs mapped to some array element.
543 int NumberOfMappedPHIScalars = 0;
545 /// Determine whether two knowledges are conflicting with each other.
547 /// @see Knowledge::isConflicting
548 bool isConflicting(const Knowledge &Proposed) {
549 raw_ostream *OS = nullptr;
550 LLVM_DEBUG(OS = &llvm::dbgs());
551 return Knowledge::isConflicting(Zone, Proposed, OS, 4);
554 /// Determine whether @p SAI is a scalar that can be mapped to an array
555 /// element.
556 bool isMappable(const ScopArrayInfo *SAI) {
557 assert(SAI);
559 if (SAI->isValueKind()) {
560 auto *MA = S->getValueDef(SAI);
561 if (!MA) {
562 LLVM_DEBUG(
563 dbgs()
564 << " Reject because value is read-only within the scop\n");
565 return false;
568 // Mapping if value is used after scop is not supported. The code
569 // generator would need to reload the scalar after the scop, but it
570 // does not have the information to where it is mapped to. Only the
571 // MemoryAccesses have that information, not the ScopArrayInfo.
572 auto Inst = MA->getAccessInstruction();
573 for (auto User : Inst->users()) {
574 if (!isa<Instruction>(User))
575 return false;
576 auto UserInst = cast<Instruction>(User);
578 if (!S->contains(UserInst)) {
579 LLVM_DEBUG(dbgs() << " Reject because value is escaping\n");
580 return false;
584 return true;
587 if (SAI->isPHIKind()) {
588 auto *MA = S->getPHIRead(SAI);
589 assert(MA);
591 // Mapping of an incoming block from before the SCoP is not supported by
592 // the code generator.
593 auto PHI = cast<PHINode>(MA->getAccessInstruction());
594 for (auto Incoming : PHI->blocks()) {
595 if (!S->contains(Incoming)) {
596 LLVM_DEBUG(dbgs()
597 << " Reject because at least one incoming block is "
598 "not in the scop region\n");
599 return false;
603 return true;
606 LLVM_DEBUG(dbgs() << " Reject ExitPHI or other non-value\n");
607 return false;
610 /// Compute the uses of a MemoryKind::Value and its lifetime (from its
611 /// definition to the last use).
613 /// @param SAI The ScopArrayInfo representing the value's storage.
615 /// @return { DomainDef[] -> DomainUse[] }, { DomainDef[] -> Zone[] }
616 /// First element is the set of uses for each definition.
617 /// The second is the lifetime of each definition.
618 std::tuple<isl::union_map, isl::map>
619 computeValueUses(const ScopArrayInfo *SAI) {
620 assert(SAI->isValueKind());
622 // { DomainRead[] }
623 auto Reads = makeEmptyUnionSet();
625 // Find all uses.
626 for (auto *MA : S->getValueUses(SAI))
627 Reads = Reads.unite(getDomainFor(MA));
629 // { DomainRead[] -> Scatter[] }
630 auto ReadSchedule = getScatterFor(Reads);
632 auto *DefMA = S->getValueDef(SAI);
633 assert(DefMA);
635 // { DomainDef[] }
636 auto Writes = getDomainFor(DefMA);
638 // { DomainDef[] -> Scatter[] }
639 auto WriteScatter = getScatterFor(Writes);
641 // { Scatter[] -> DomainDef[] }
642 auto ReachDef = getScalarReachingDefinition(DefMA->getStatement());
644 // { [DomainDef[] -> Scatter[]] -> DomainUse[] }
645 auto Uses = isl::union_map(ReachDef.reverse().range_map())
646 .apply_range(ReadSchedule.reverse());
648 // { DomainDef[] -> Scatter[] }
649 auto UseScatter =
650 singleton(Uses.domain().unwrap(),
651 Writes.get_space().map_from_domain_and_range(ScatterSpace));
653 // { DomainDef[] -> Zone[] }
654 auto Lifetime = betweenScatter(WriteScatter, UseScatter, false, true);
656 // { DomainDef[] -> DomainRead[] }
657 auto DefUses = Uses.domain_factor_domain();
659 return std::make_pair(DefUses, Lifetime);
662 /// Try to map a MemoryKind::Value to a given array element.
664 /// @param SAI Representation of the scalar's memory to map.
665 /// @param TargetElt { Scatter[] -> Element[] }
666 /// Suggestion where to map a scalar to when at a timepoint.
668 /// @return true if the scalar was successfully mapped.
669 bool tryMapValue(const ScopArrayInfo *SAI, isl::map TargetElt) {
670 assert(SAI->isValueKind());
672 auto *DefMA = S->getValueDef(SAI);
673 assert(DefMA->isValueKind());
674 assert(DefMA->isMustWrite());
675 auto *V = DefMA->getAccessValue();
676 auto *DefInst = DefMA->getAccessInstruction();
678 // Stop if the scalar has already been mapped.
679 if (!DefMA->getLatestScopArrayInfo()->isValueKind())
680 return false;
682 // { DomainDef[] -> Scatter[] }
683 auto DefSched = getScatterFor(DefMA);
685 // Where each write is mapped to, according to the suggestion.
686 // { DomainDef[] -> Element[] }
687 auto DefTarget = TargetElt.apply_domain(DefSched.reverse());
688 simplify(DefTarget);
689 LLVM_DEBUG(dbgs() << " Def Mapping: " << DefTarget << '\n');
691 auto OrigDomain = getDomainFor(DefMA);
692 auto MappedDomain = DefTarget.domain();
693 if (!OrigDomain.is_subset(MappedDomain)) {
694 LLVM_DEBUG(
695 dbgs()
696 << " Reject because mapping does not encompass all instances\n");
697 return false;
700 // { DomainDef[] -> Zone[] }
701 isl::map Lifetime;
703 // { DomainDef[] -> DomainUse[] }
704 isl::union_map DefUses;
706 std::tie(DefUses, Lifetime) = computeValueUses(SAI);
707 LLVM_DEBUG(dbgs() << " Lifetime: " << Lifetime << '\n');
709 /// { [Element[] -> Zone[]] }
710 auto EltZone = Lifetime.apply_domain(DefTarget).wrap();
711 simplify(EltZone);
713 // When known knowledge is disabled, just return the unknown value. It will
714 // either get filtered out or conflict with itself.
715 // { DomainDef[] -> ValInst[] }
716 isl::map ValInst;
717 if (DelicmComputeKnown)
718 ValInst = makeValInst(V, DefMA->getStatement(),
719 LI->getLoopFor(DefInst->getParent()));
720 else
721 ValInst = makeUnknownForDomain(DefMA->getStatement());
723 // { DomainDef[] -> [Element[] -> Zone[]] }
724 auto EltKnownTranslator = DefTarget.range_product(Lifetime);
726 // { [Element[] -> Zone[]] -> ValInst[] }
727 auto EltKnown = ValInst.apply_domain(EltKnownTranslator);
728 simplify(EltKnown);
730 // { DomainDef[] -> [Element[] -> Scatter[]] }
731 auto WrittenTranslator = DefTarget.range_product(DefSched);
733 // { [Element[] -> Scatter[]] -> ValInst[] }
734 auto DefEltSched = ValInst.apply_domain(WrittenTranslator);
735 simplify(DefEltSched);
737 Knowledge Proposed(EltZone, {}, filterKnownValInst(EltKnown), DefEltSched);
738 if (isConflicting(Proposed))
739 return false;
741 // { DomainUse[] -> Element[] }
742 auto UseTarget = DefUses.reverse().apply_range(DefTarget);
744 mapValue(SAI, std::move(DefTarget), std::move(UseTarget),
745 std::move(Lifetime), std::move(Proposed));
746 return true;
749 /// After a scalar has been mapped, update the global knowledge.
750 void applyLifetime(Knowledge Proposed) {
751 Zone.learnFrom(std::move(Proposed));
754 /// Map a MemoryKind::Value scalar to an array element.
756 /// Callers must have ensured that the mapping is valid and not conflicting.
758 /// @param SAI The ScopArrayInfo representing the scalar's memory to
759 /// map.
760 /// @param DefTarget { DomainDef[] -> Element[] }
761 /// The array element to map the scalar to.
762 /// @param UseTarget { DomainUse[] -> Element[] }
763 /// The array elements the uses are mapped to.
764 /// @param Lifetime { DomainDef[] -> Zone[] }
765 /// The lifetime of each llvm::Value definition for
766 /// reporting.
767 /// @param Proposed Mapping constraints for reporting.
768 void mapValue(const ScopArrayInfo *SAI, isl::map DefTarget,
769 isl::union_map UseTarget, isl::map Lifetime,
770 Knowledge Proposed) {
771 // Redirect the read accesses.
772 for (auto *MA : S->getValueUses(SAI)) {
773 // { DomainUse[] }
774 auto Domain = getDomainFor(MA);
776 // { DomainUse[] -> Element[] }
777 auto NewAccRel = UseTarget.intersect_domain(Domain);
778 simplify(NewAccRel);
780 assert(isl_union_map_n_map(NewAccRel.get()) == 1);
781 MA->setNewAccessRelation(isl::map::from_union_map(NewAccRel));
784 auto *WA = S->getValueDef(SAI);
785 WA->setNewAccessRelation(DefTarget);
786 applyLifetime(Proposed);
788 MappedValueScalars++;
789 NumberOfMappedValueScalars += 1;
792 isl::map makeValInst(Value *Val, ScopStmt *UserStmt, Loop *Scope,
793 bool IsCertain = true) {
794 // When known knowledge is disabled, just return the unknown value. It will
795 // either get filtered out or conflict with itself.
796 if (!DelicmComputeKnown)
797 return makeUnknownForDomain(UserStmt);
798 return ZoneAlgorithm::makeValInst(Val, UserStmt, Scope, IsCertain);
801 /// Express the incoming values of a PHI for each incoming statement in an
802 /// isl::union_map.
804 /// @param SAI The PHI scalar represented by a ScopArrayInfo.
806 /// @return { PHIWriteDomain[] -> ValInst[] }
807 isl::union_map determinePHIWrittenValues(const ScopArrayInfo *SAI) {
808 auto Result = makeEmptyUnionMap();
810 // Collect the incoming values.
811 for (auto *MA : S->getPHIIncomings(SAI)) {
812 // { DomainWrite[] -> ValInst[] }
813 isl::union_map ValInst;
814 auto *WriteStmt = MA->getStatement();
816 auto Incoming = MA->getIncoming();
817 assert(!Incoming.empty());
818 if (Incoming.size() == 1) {
819 ValInst = makeValInst(Incoming[0].second, WriteStmt,
820 LI->getLoopFor(Incoming[0].first));
821 } else {
822 // If the PHI is in a subregion's exit node it can have multiple
823 // incoming values (+ maybe another incoming edge from an unrelated
824 // block). We cannot directly represent it as a single llvm::Value.
825 // We currently model it as unknown value, but modeling as the PHIInst
826 // itself could be OK, too.
827 ValInst = makeUnknownForDomain(WriteStmt);
830 Result = Result.unite(ValInst);
833 assert(Result.is_single_valued() &&
834 "Cannot have multiple incoming values for same incoming statement");
835 return Result;
838 /// Try to map a MemoryKind::PHI scalar to a given array element.
840 /// @param SAI Representation of the scalar's memory to map.
841 /// @param TargetElt { Scatter[] -> Element[] }
842 /// Suggestion where to map the scalar to when at a
843 /// timepoint.
845 /// @return true if the PHI scalar has been mapped.
846 bool tryMapPHI(const ScopArrayInfo *SAI, isl::map TargetElt) {
847 auto *PHIRead = S->getPHIRead(SAI);
848 assert(PHIRead->isPHIKind());
849 assert(PHIRead->isRead());
851 // Skip if already been mapped.
852 if (!PHIRead->getLatestScopArrayInfo()->isPHIKind())
853 return false;
855 // { DomainRead[] -> Scatter[] }
856 auto PHISched = getScatterFor(PHIRead);
858 // { DomainRead[] -> Element[] }
859 auto PHITarget = PHISched.apply_range(TargetElt);
860 simplify(PHITarget);
861 LLVM_DEBUG(dbgs() << " Mapping: " << PHITarget << '\n');
863 auto OrigDomain = getDomainFor(PHIRead);
864 auto MappedDomain = PHITarget.domain();
865 if (!OrigDomain.is_subset(MappedDomain)) {
866 LLVM_DEBUG(
867 dbgs()
868 << " Reject because mapping does not encompass all instances\n");
869 return false;
872 // { DomainRead[] -> DomainWrite[] }
873 auto PerPHIWrites = computePerPHI(SAI);
874 if (PerPHIWrites.is_null()) {
875 LLVM_DEBUG(
876 dbgs() << " Reject because cannot determine incoming values\n");
877 return false;
880 // { DomainWrite[] -> Element[] }
881 auto WritesTarget = PerPHIWrites.apply_domain(PHITarget).reverse();
882 simplify(WritesTarget);
884 // { DomainWrite[] }
885 auto UniverseWritesDom = isl::union_set::empty(ParamSpace.ctx());
887 for (auto *MA : S->getPHIIncomings(SAI))
888 UniverseWritesDom = UniverseWritesDom.unite(getDomainFor(MA));
890 auto RelevantWritesTarget = WritesTarget;
891 if (DelicmOverapproximateWrites)
892 WritesTarget = expandMapping(WritesTarget, UniverseWritesDom);
894 auto ExpandedWritesDom = WritesTarget.domain();
895 if (!DelicmPartialWrites &&
896 !UniverseWritesDom.is_subset(ExpandedWritesDom)) {
897 LLVM_DEBUG(
898 dbgs() << " Reject because did not find PHI write mapping for "
899 "all instances\n");
900 if (DelicmOverapproximateWrites)
901 LLVM_DEBUG(dbgs() << " Relevant Mapping: "
902 << RelevantWritesTarget << '\n');
903 LLVM_DEBUG(dbgs() << " Deduced Mapping: " << WritesTarget
904 << '\n');
905 LLVM_DEBUG(dbgs() << " Missing instances: "
906 << UniverseWritesDom.subtract(ExpandedWritesDom)
907 << '\n');
908 return false;
911 // { DomainRead[] -> Scatter[] }
912 isl::union_map PerPHIWriteScatterUmap = PerPHIWrites.apply_range(Schedule);
913 isl::map PerPHIWriteScatter =
914 singleton(PerPHIWriteScatterUmap, PHISched.get_space());
916 // { DomainRead[] -> Zone[] }
917 auto Lifetime = betweenScatter(PerPHIWriteScatter, PHISched, false, true);
918 simplify(Lifetime);
919 LLVM_DEBUG(dbgs() << " Lifetime: " << Lifetime << "\n");
921 // { DomainWrite[] -> Zone[] }
922 auto WriteLifetime = isl::union_map(Lifetime).apply_domain(PerPHIWrites);
924 // { DomainWrite[] -> ValInst[] }
925 auto WrittenValue = determinePHIWrittenValues(SAI);
927 // { DomainWrite[] -> [Element[] -> Scatter[]] }
928 auto WrittenTranslator = WritesTarget.range_product(Schedule);
930 // { [Element[] -> Scatter[]] -> ValInst[] }
931 auto Written = WrittenValue.apply_domain(WrittenTranslator);
932 simplify(Written);
934 // { DomainWrite[] -> [Element[] -> Zone[]] }
935 auto LifetimeTranslator = WritesTarget.range_product(WriteLifetime);
937 // { DomainWrite[] -> ValInst[] }
938 auto WrittenKnownValue = filterKnownValInst(WrittenValue);
940 // { [Element[] -> Zone[]] -> ValInst[] }
941 auto EltLifetimeInst = WrittenKnownValue.apply_domain(LifetimeTranslator);
942 simplify(EltLifetimeInst);
944 // { [Element[] -> Zone[] }
945 auto Occupied = LifetimeTranslator.range();
946 simplify(Occupied);
948 Knowledge Proposed(Occupied, {}, EltLifetimeInst, Written);
949 if (isConflicting(Proposed))
950 return false;
952 mapPHI(SAI, std::move(PHITarget), std::move(WritesTarget),
953 std::move(Lifetime), std::move(Proposed));
954 return true;
957 /// Map a MemoryKind::PHI scalar to an array element.
959 /// Callers must have ensured that the mapping is valid and not conflicting
960 /// with the common knowledge.
962 /// @param SAI The ScopArrayInfo representing the scalar's memory to
963 /// map.
964 /// @param ReadTarget { DomainRead[] -> Element[] }
965 /// The array element to map the scalar to.
966 /// @param WriteTarget { DomainWrite[] -> Element[] }
967 /// New access target for each PHI incoming write.
968 /// @param Lifetime { DomainRead[] -> Zone[] }
969 /// The lifetime of each PHI for reporting.
970 /// @param Proposed Mapping constraints for reporting.
971 void mapPHI(const ScopArrayInfo *SAI, isl::map ReadTarget,
972 isl::union_map WriteTarget, isl::map Lifetime,
973 Knowledge Proposed) {
974 // { Element[] }
975 isl::space ElementSpace = ReadTarget.get_space().range();
977 // Redirect the PHI incoming writes.
978 for (auto *MA : S->getPHIIncomings(SAI)) {
979 // { DomainWrite[] }
980 auto Domain = getDomainFor(MA);
982 // { DomainWrite[] -> Element[] }
983 auto NewAccRel = WriteTarget.intersect_domain(Domain);
984 simplify(NewAccRel);
986 isl::space NewAccRelSpace =
987 Domain.get_space().map_from_domain_and_range(ElementSpace);
988 isl::map NewAccRelMap = singleton(NewAccRel, NewAccRelSpace);
989 MA->setNewAccessRelation(NewAccRelMap);
992 // Redirect the PHI read.
993 auto *PHIRead = S->getPHIRead(SAI);
994 PHIRead->setNewAccessRelation(ReadTarget);
995 applyLifetime(Proposed);
997 MappedPHIScalars++;
998 NumberOfMappedPHIScalars++;
1001 /// Search and map scalars to memory overwritten by @p TargetStoreMA.
1003 /// Start trying to map scalars that are used in the same statement as the
1004 /// store. For every successful mapping, try to also map scalars of the
1005 /// statements where those are written. Repeat, until no more mapping
1006 /// opportunity is found.
1008 /// There is currently no preference in which order scalars are tried.
1009 /// Ideally, we would direct it towards a load instruction of the same array
1010 /// element.
1011 bool collapseScalarsToStore(MemoryAccess *TargetStoreMA) {
1012 assert(TargetStoreMA->isLatestArrayKind());
1013 assert(TargetStoreMA->isMustWrite());
1015 auto TargetStmt = TargetStoreMA->getStatement();
1017 // { DomTarget[] }
1018 auto TargetDom = getDomainFor(TargetStmt);
1020 // { DomTarget[] -> Element[] }
1021 auto TargetAccRel = getAccessRelationFor(TargetStoreMA);
1023 // { Zone[] -> DomTarget[] }
1024 // For each point in time, find the next target store instance.
1025 auto Target =
1026 computeScalarReachingOverwrite(Schedule, TargetDom, false, true);
1028 // { Zone[] -> Element[] }
1029 // Use the target store's write location as a suggestion to map scalars to.
1030 auto EltTarget = Target.apply_range(TargetAccRel);
1031 simplify(EltTarget);
1032 LLVM_DEBUG(dbgs() << " Target mapping is " << EltTarget << '\n');
1034 // Stack of elements not yet processed.
1035 SmallVector<MemoryAccess *, 16> Worklist;
1037 // Set of scalars already tested.
1038 SmallPtrSet<const ScopArrayInfo *, 16> Closed;
1040 // Lambda to add all scalar reads to the work list.
1041 auto ProcessAllIncoming = [&](ScopStmt *Stmt) {
1042 for (auto *MA : *Stmt) {
1043 if (!MA->isLatestScalarKind())
1044 continue;
1045 if (!MA->isRead())
1046 continue;
1048 Worklist.push_back(MA);
1052 auto *WrittenVal = TargetStoreMA->getAccessInstruction()->getOperand(0);
1053 if (auto *WrittenValInputMA = TargetStmt->lookupInputAccessOf(WrittenVal))
1054 Worklist.push_back(WrittenValInputMA);
1055 else
1056 ProcessAllIncoming(TargetStmt);
1058 auto AnyMapped = false;
1059 auto &DL = S->getRegion().getEntry()->getModule()->getDataLayout();
1060 auto StoreSize =
1061 DL.getTypeAllocSize(TargetStoreMA->getAccessValue()->getType());
1063 while (!Worklist.empty()) {
1064 auto *MA = Worklist.pop_back_val();
1066 auto *SAI = MA->getScopArrayInfo();
1067 if (Closed.count(SAI))
1068 continue;
1069 Closed.insert(SAI);
1070 LLVM_DEBUG(dbgs() << "\n Trying to map " << MA << " (SAI: " << SAI
1071 << ")\n");
1073 // Skip non-mappable scalars.
1074 if (!isMappable(SAI))
1075 continue;
1077 auto MASize = DL.getTypeAllocSize(MA->getAccessValue()->getType());
1078 if (MASize > StoreSize) {
1079 LLVM_DEBUG(
1080 dbgs() << " Reject because storage size is insufficient\n");
1081 continue;
1084 // Try to map MemoryKind::Value scalars.
1085 if (SAI->isValueKind()) {
1086 if (!tryMapValue(SAI, EltTarget))
1087 continue;
1089 auto *DefAcc = S->getValueDef(SAI);
1090 ProcessAllIncoming(DefAcc->getStatement());
1092 AnyMapped = true;
1093 continue;
1096 // Try to map MemoryKind::PHI scalars.
1097 if (SAI->isPHIKind()) {
1098 if (!tryMapPHI(SAI, EltTarget))
1099 continue;
1100 // Add inputs of all incoming statements to the worklist. Prefer the
1101 // input accesses of the incoming blocks.
1102 for (auto *PHIWrite : S->getPHIIncomings(SAI)) {
1103 auto *PHIWriteStmt = PHIWrite->getStatement();
1104 bool FoundAny = false;
1105 for (auto Incoming : PHIWrite->getIncoming()) {
1106 auto *IncomingInputMA =
1107 PHIWriteStmt->lookupInputAccessOf(Incoming.second);
1108 if (!IncomingInputMA)
1109 continue;
1111 Worklist.push_back(IncomingInputMA);
1112 FoundAny = true;
1115 if (!FoundAny)
1116 ProcessAllIncoming(PHIWrite->getStatement());
1119 AnyMapped = true;
1120 continue;
1124 if (AnyMapped) {
1125 TargetsMapped++;
1126 NumberOfTargetsMapped++;
1128 return AnyMapped;
1131 /// Compute when an array element is unused.
1133 /// @return { [Element[] -> Zone[]] }
1134 isl::union_set computeLifetime() const {
1135 // { Element[] -> Zone[] }
1136 auto ArrayUnused = computeArrayUnused(Schedule, AllMustWrites, AllReads,
1137 false, false, true);
1139 auto Result = ArrayUnused.wrap();
1141 simplify(Result);
1142 return Result;
1145 /// Determine when an array element is written to, and which value instance is
1146 /// written.
1148 /// @return { [Element[] -> Scatter[]] -> ValInst[] }
1149 isl::union_map computeWritten() const {
1150 // { [Element[] -> Scatter[]] -> ValInst[] }
1151 auto EltWritten = applyDomainRange(AllWriteValInst, Schedule);
1153 simplify(EltWritten);
1154 return EltWritten;
1157 /// Determine whether an access touches at most one element.
1159 /// The accessed element could be a scalar or accessing an array with constant
1160 /// subscript, such that all instances access only that element.
1162 /// @param MA The access to test.
1164 /// @return True, if zero or one elements are accessed; False if at least two
1165 /// different elements are accessed.
1166 bool isScalarAccess(MemoryAccess *MA) {
1167 auto Map = getAccessRelationFor(MA);
1168 auto Set = Map.range();
1169 return Set.is_singleton();
1172 /// Print mapping statistics to @p OS.
1173 void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const {
1174 OS.indent(Indent) << "Statistics {\n";
1175 OS.indent(Indent + 4) << "Compatible overwrites: "
1176 << NumberOfCompatibleTargets << "\n";
1177 OS.indent(Indent + 4) << "Overwrites mapped to: " << NumberOfTargetsMapped
1178 << '\n';
1179 OS.indent(Indent + 4) << "Value scalars mapped: "
1180 << NumberOfMappedValueScalars << '\n';
1181 OS.indent(Indent + 4) << "PHI scalars mapped: "
1182 << NumberOfMappedPHIScalars << '\n';
1183 OS.indent(Indent) << "}\n";
1186 public:
1187 DeLICMImpl(Scop *S, LoopInfo *LI) : ZoneAlgorithm("polly-delicm", S, LI) {}
1189 /// Calculate the lifetime (definition to last use) of every array element.
1191 /// @return True if the computed lifetimes (#Zone) is usable.
1192 bool computeZone() {
1193 // Check that nothing strange occurs.
1194 collectCompatibleElts();
1196 isl::union_set EltUnused;
1197 isl::union_map EltKnown, EltWritten;
1200 IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), DelicmMaxOps);
1202 computeCommon();
1204 EltUnused = computeLifetime();
1205 EltKnown = computeKnown(true, false);
1206 EltWritten = computeWritten();
1208 DeLICMAnalyzed++;
1210 if (EltUnused.is_null() || EltKnown.is_null() || EltWritten.is_null()) {
1211 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota &&
1212 "The only reason that these things have not been computed should "
1213 "be if the max-operations limit hit");
1214 DeLICMOutOfQuota++;
1215 LLVM_DEBUG(dbgs() << "DeLICM analysis exceeded max_operations\n");
1216 DebugLoc Begin, End;
1217 getDebugLocations(getBBPairForRegion(&S->getRegion()), Begin, End);
1218 OptimizationRemarkAnalysis R(DEBUG_TYPE, "OutOfQuota", Begin,
1219 S->getEntry());
1220 R << "maximal number of operations exceeded during zone analysis";
1221 S->getFunction().getContext().diagnose(R);
1222 return false;
1225 Zone = OriginalZone = Knowledge({}, EltUnused, EltKnown, EltWritten);
1226 LLVM_DEBUG(dbgs() << "Computed Zone:\n"; OriginalZone.print(dbgs(), 4));
1228 assert(Zone.isUsable() && OriginalZone.isUsable());
1229 return true;
1232 /// Try to map as many scalars to unused array elements as possible.
1234 /// Multiple scalars might be mappable to intersecting unused array element
1235 /// zones, but we can only chose one. This is a greedy algorithm, therefore
1236 /// the first processed element claims it.
1237 void greedyCollapse() {
1238 bool Modified = false;
1240 for (auto &Stmt : *S) {
1241 for (auto *MA : Stmt) {
1242 if (!MA->isLatestArrayKind())
1243 continue;
1244 if (!MA->isWrite())
1245 continue;
1247 if (MA->isMayWrite()) {
1248 LLVM_DEBUG(dbgs() << "Access " << MA
1249 << " pruned because it is a MAY_WRITE\n");
1250 OptimizationRemarkMissed R(DEBUG_TYPE, "TargetMayWrite",
1251 MA->getAccessInstruction());
1252 R << "Skipped possible mapping target because it is not an "
1253 "unconditional overwrite";
1254 S->getFunction().getContext().diagnose(R);
1255 continue;
1258 if (Stmt.getNumIterators() == 0) {
1259 LLVM_DEBUG(dbgs() << "Access " << MA
1260 << " pruned because it is not in a loop\n");
1261 OptimizationRemarkMissed R(DEBUG_TYPE, "WriteNotInLoop",
1262 MA->getAccessInstruction());
1263 R << "skipped possible mapping target because it is not in a loop";
1264 S->getFunction().getContext().diagnose(R);
1265 continue;
1268 if (isScalarAccess(MA)) {
1269 LLVM_DEBUG(dbgs()
1270 << "Access " << MA
1271 << " pruned because it writes only a single element\n");
1272 OptimizationRemarkMissed R(DEBUG_TYPE, "ScalarWrite",
1273 MA->getAccessInstruction());
1274 R << "skipped possible mapping target because the memory location "
1275 "written to does not depend on its outer loop";
1276 S->getFunction().getContext().diagnose(R);
1277 continue;
1280 if (!isa<StoreInst>(MA->getAccessInstruction())) {
1281 LLVM_DEBUG(dbgs() << "Access " << MA
1282 << " pruned because it is not a StoreInst\n");
1283 OptimizationRemarkMissed R(DEBUG_TYPE, "NotAStore",
1284 MA->getAccessInstruction());
1285 R << "skipped possible mapping target because non-store instructions "
1286 "are not supported";
1287 S->getFunction().getContext().diagnose(R);
1288 continue;
1291 // Check for more than one element acces per statement instance.
1292 // Currently we expect write accesses to be functional, eg. disallow
1294 // { Stmt[0] -> [i] : 0 <= i < 2 }
1296 // This may occur when some accesses to the element write/read only
1297 // parts of the element, eg. a single byte. Polly then divides each
1298 // element into subelements of the smallest access length, normal access
1299 // then touch multiple of such subelements. It is very common when the
1300 // array is accesses with memset, memcpy or memmove which take i8*
1301 // arguments.
1302 isl::union_map AccRel = MA->getLatestAccessRelation();
1303 if (!AccRel.is_single_valued().is_true()) {
1304 LLVM_DEBUG(dbgs() << "Access " << MA
1305 << " is incompatible because it writes multiple "
1306 "elements per instance\n");
1307 OptimizationRemarkMissed R(DEBUG_TYPE, "NonFunctionalAccRel",
1308 MA->getAccessInstruction());
1309 R << "skipped possible mapping target because it writes more than "
1310 "one element";
1311 S->getFunction().getContext().diagnose(R);
1312 continue;
1315 isl::union_set TouchedElts = AccRel.range();
1316 if (!TouchedElts.is_subset(CompatibleElts)) {
1317 LLVM_DEBUG(
1318 dbgs()
1319 << "Access " << MA
1320 << " is incompatible because it touches incompatible elements\n");
1321 OptimizationRemarkMissed R(DEBUG_TYPE, "IncompatibleElts",
1322 MA->getAccessInstruction());
1323 R << "skipped possible mapping target because a target location "
1324 "cannot be reliably analyzed";
1325 S->getFunction().getContext().diagnose(R);
1326 continue;
1329 assert(isCompatibleAccess(MA));
1330 NumberOfCompatibleTargets++;
1331 LLVM_DEBUG(dbgs() << "Analyzing target access " << MA << "\n");
1332 if (collapseScalarsToStore(MA))
1333 Modified = true;
1337 if (Modified)
1338 DeLICMScopsModified++;
1341 /// Dump the internal information about a performed DeLICM to @p OS.
1342 void print(llvm::raw_ostream &OS, int Indent = 0) {
1343 if (!Zone.isUsable()) {
1344 OS.indent(Indent) << "Zone not computed\n";
1345 return;
1348 printStatistics(OS, Indent);
1349 if (!isModified()) {
1350 OS.indent(Indent) << "No modification has been made\n";
1351 return;
1353 printAccesses(OS, Indent);
1356 /// Return whether at least one transformation been applied.
1357 bool isModified() const { return NumberOfTargetsMapped > 0; }
1360 static std::unique_ptr<DeLICMImpl> collapseToUnused(Scop &S, LoopInfo &LI) {
1361 std::unique_ptr<DeLICMImpl> Impl = std::make_unique<DeLICMImpl>(&S, &LI);
1363 if (!Impl->computeZone()) {
1364 LLVM_DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n");
1365 return Impl;
1368 LLVM_DEBUG(dbgs() << "Collapsing scalars to unused array elements...\n");
1369 Impl->greedyCollapse();
1371 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
1372 LLVM_DEBUG(dbgs() << S);
1374 return Impl;
1377 static std::unique_ptr<DeLICMImpl> runDeLICM(Scop &S, LoopInfo &LI) {
1378 std::unique_ptr<DeLICMImpl> Impl = collapseToUnused(S, LI);
1380 Scop::ScopStatistics ScopStats = S.getStatistics();
1381 NumValueWrites += ScopStats.NumValueWrites;
1382 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
1383 NumPHIWrites += ScopStats.NumPHIWrites;
1384 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
1385 NumSingletonWrites += ScopStats.NumSingletonWrites;
1386 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
1388 return Impl;
1391 static PreservedAnalyses runDeLICMUsingNPM(Scop &S, ScopAnalysisManager &SAM,
1392 ScopStandardAnalysisResults &SAR,
1393 SPMUpdater &U, raw_ostream *OS) {
1394 LoopInfo &LI = SAR.LI;
1395 std::unique_ptr<DeLICMImpl> Impl = runDeLICM(S, LI);
1397 if (OS) {
1398 *OS << "Printing analysis 'Polly - DeLICM/DePRE' for region: '"
1399 << S.getName() << "' in function '" << S.getFunction().getName()
1400 << "':\n";
1401 if (Impl) {
1402 assert(Impl->getScop() == &S);
1404 *OS << "DeLICM result:\n";
1405 Impl->print(*OS);
1409 if (!Impl->isModified())
1410 return PreservedAnalyses::all();
1412 PreservedAnalyses PA;
1413 PA.preserveSet<AllAnalysesOn<Module>>();
1414 PA.preserveSet<AllAnalysesOn<Function>>();
1415 PA.preserveSet<AllAnalysesOn<Loop>>();
1416 return PA;
1419 class DeLICMWrapperPass final : public ScopPass {
1420 private:
1421 DeLICMWrapperPass(const DeLICMWrapperPass &) = delete;
1422 const DeLICMWrapperPass &operator=(const DeLICMWrapperPass &) = delete;
1424 /// The pass implementation, also holding per-scop data.
1425 std::unique_ptr<DeLICMImpl> Impl;
1427 public:
1428 static char ID;
1429 explicit DeLICMWrapperPass() : ScopPass(ID) {}
1431 void getAnalysisUsage(AnalysisUsage &AU) const override {
1432 AU.addRequiredTransitive<ScopInfoRegionPass>();
1433 AU.addRequired<LoopInfoWrapperPass>();
1434 AU.setPreservesAll();
1437 bool runOnScop(Scop &S) override {
1438 // Free resources for previous scop's computation, if not yet done.
1439 releaseMemory();
1441 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1442 Impl = runDeLICM(S, LI);
1444 return Impl->isModified();
1447 void printScop(raw_ostream &OS, Scop &S) const override {
1448 if (!Impl)
1449 return;
1450 assert(Impl->getScop() == &S);
1452 OS << "DeLICM result:\n";
1453 Impl->print(OS);
1456 void releaseMemory() override { Impl.reset(); }
1459 char DeLICMWrapperPass::ID;
1461 /// Print result from DeLICMWrapperPass.
1462 class DeLICMPrinterLegacyPass final : public ScopPass {
1463 public:
1464 static char ID;
1466 DeLICMPrinterLegacyPass() : DeLICMPrinterLegacyPass(outs()){};
1467 explicit DeLICMPrinterLegacyPass(llvm::raw_ostream &OS)
1468 : ScopPass(ID), OS(OS) {}
1470 bool runOnScop(Scop &S) override {
1471 DeLICMWrapperPass &P = getAnalysis<DeLICMWrapperPass>();
1473 OS << "Printing analysis '" << P.getPassName() << "' for region: '"
1474 << S.getRegion().getNameStr() << "' in function '"
1475 << S.getFunction().getName() << "':\n";
1476 P.printScop(OS, S);
1478 return false;
1481 void getAnalysisUsage(AnalysisUsage &AU) const override {
1482 ScopPass::getAnalysisUsage(AU);
1483 AU.addRequired<DeLICMWrapperPass>();
1484 AU.setPreservesAll();
1487 private:
1488 llvm::raw_ostream &OS;
1491 char DeLICMPrinterLegacyPass::ID = 0;
1492 } // anonymous namespace
1494 Pass *polly::createDeLICMWrapperPass() { return new DeLICMWrapperPass(); }
1496 llvm::Pass *polly::createDeLICMPrinterLegacyPass(llvm::raw_ostream &OS) {
1497 return new DeLICMPrinterLegacyPass(OS);
1500 llvm::PreservedAnalyses polly::DeLICMPass::run(Scop &S,
1501 ScopAnalysisManager &SAM,
1502 ScopStandardAnalysisResults &SAR,
1503 SPMUpdater &U) {
1504 return runDeLICMUsingNPM(S, SAM, SAR, U, nullptr);
1507 llvm::PreservedAnalyses DeLICMPrinterPass::run(Scop &S,
1508 ScopAnalysisManager &SAM,
1509 ScopStandardAnalysisResults &SAR,
1510 SPMUpdater &U) {
1511 return runDeLICMUsingNPM(S, SAM, SAR, U, &OS);
1514 bool polly::isConflicting(
1515 isl::union_set ExistingOccupied, isl::union_set ExistingUnused,
1516 isl::union_map ExistingKnown, isl::union_map ExistingWrites,
1517 isl::union_set ProposedOccupied, isl::union_set ProposedUnused,
1518 isl::union_map ProposedKnown, isl::union_map ProposedWrites,
1519 llvm::raw_ostream *OS, unsigned Indent) {
1520 Knowledge Existing(std::move(ExistingOccupied), std::move(ExistingUnused),
1521 std::move(ExistingKnown), std::move(ExistingWrites));
1522 Knowledge Proposed(std::move(ProposedOccupied), std::move(ProposedUnused),
1523 std::move(ProposedKnown), std::move(ProposedWrites));
1525 return Knowledge::isConflicting(Existing, Proposed, OS, Indent);
1528 INITIALIZE_PASS_BEGIN(DeLICMWrapperPass, "polly-delicm", "Polly - DeLICM/DePRE",
1529 false, false)
1530 INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass)
1531 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1532 INITIALIZE_PASS_END(DeLICMWrapperPass, "polly-delicm", "Polly - DeLICM/DePRE",
1533 false, false)
1535 INITIALIZE_PASS_BEGIN(DeLICMPrinterLegacyPass, "polly-print-delicm",
1536 "Polly - Print DeLICM/DePRE", false, false)
1537 INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass)
1538 INITIALIZE_PASS_END(DeLICMPrinterLegacyPass, "polly-print-delicm",
1539 "Polly - Print DeLICM/DePRE", false, false)