flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp

   1 //===-- ArrayValueCopy.cpp ------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "flang/Optimizer/Builder/Array.h"
  10 #include "flang/Optimizer/Builder/BoxValue.h"
  11 #include "flang/Optimizer/Builder/FIRBuilder.h"
  12 #include "flang/Optimizer/Builder/Factory.h"
  13 #include "flang/Optimizer/Builder/Runtime/Derived.h"
  14 #include "flang/Optimizer/Builder/Todo.h"
  15 #include "flang/Optimizer/Dialect/FIRDialect.h"
  16 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
  17 #include "flang/Optimizer/Dialect/Support/FIRContext.h"
  18 #include "flang/Optimizer/Transforms/Passes.h"
  19 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
  20 #include "mlir/Dialect/SCF/IR/SCF.h"
  21 #include "mlir/Transforms/DialectConversion.h"
  22 #include "llvm/Support/Debug.h"
  23
  24 namespace fir {
  25 #define GEN_PASS_DEF_ARRAYVALUECOPY
  26 #include "flang/Optimizer/Transforms/Passes.h.inc"
  27 } // namespace fir
  28
  29 #define DEBUG_TYPE "flang-array-value-copy"
  30
  31 using namespace fir;
  32 using namespace mlir;
  33
  34 using OperationUseMapT = llvm::DenseMap<mlir::Operation *, mlir::Operation *>;
  35
  36 namespace {
  37
  38 /// Array copy analysis.
  39 /// Perform an interference analysis between array values.
  40 ///
  41 /// Lowering will generate a sequence of the following form.
  42 /// ```mlir
  43 ///   %a_1 = fir.array_load %array_1(%shape) : ...
  44 ///   ...
  45 ///   %a_j = fir.array_load %array_j(%shape) : ...
  46 ///   ...
  47 ///   %a_n = fir.array_load %array_n(%shape) : ...
  48 ///     ...
  49 ///     %v_i = fir.array_fetch %a_i, ...
  50 ///     %a_j1 = fir.array_update %a_j, ...
  51 ///     ...
  52 ///   fir.array_merge_store %a_j, %a_jn to %array_j : ...
  53 /// ```
  54 ///
  55 /// The analysis is to determine if there are any conflicts. A conflict is when
  56 /// one the following cases occurs.
  57 ///
  58 /// 1. There is an `array_update` to an array value, a_j, such that a_j was
  59 /// loaded from the same array memory reference (array_j) but with a different
  60 /// shape as the other array values a_i, where i != j. [Possible overlapping
  61 /// arrays.]
  62 ///
  63 /// 2. There is either an array_fetch or array_update of a_j with a different
  64 /// set of index values. [Possible loop-carried dependence.]
  65 ///
  66 /// If none of the array values overlap in storage and the accesses are not
  67 /// loop-carried, then the arrays are conflict-free and no copies are required.
  68 class ArrayCopyAnalysisBase {
  69 public:
  70   using ConflictSetT = llvm::SmallPtrSet<mlir::Operation *, 16>;
  71   using UseSetT = llvm::SmallPtrSet<mlir::OpOperand *, 8>;
  72   using LoadMapSetsT = llvm::DenseMap<mlir::Operation *, UseSetT>;
  73   using AmendAccessSetT = llvm::SmallPtrSet<mlir::Operation *, 4>;
  74
  75   ArrayCopyAnalysisBase(mlir::Operation *op, bool optimized)
  76       : operation{op}, optimizeConflicts(optimized) {
  77     construct(op);
  78   }
  79   virtual ~ArrayCopyAnalysisBase() = default;
  80
  81   mlir::Operation *getOperation() const { return operation; }
  82
  83   /// Return true iff the `array_merge_store` has potential conflicts.
  84   bool hasPotentialConflict(mlir::Operation *op) const {
  85     LLVM_DEBUG(llvm::dbgs()
  86                << "looking for a conflict on " << *op
  87                << " and the set has a total of " << conflicts.size() << '\n');
  88     return conflicts.contains(op);
  89   }
  90
  91   /// Return the use map.
  92   /// The use map maps array access, amend, fetch and update operations back to
  93   /// the array load that is the original source of the array value.
  94   /// It maps an array_load to an array_merge_store, if and only if the loaded
  95   /// array value has pending modifications to be merged.
  96   const OperationUseMapT &getUseMap() const { return useMap; }
  97
  98   /// Return the set of array_access ops directly associated with array_amend
  99   /// ops.
 100   bool inAmendAccessSet(mlir::Operation *op) const {
 101     return amendAccesses.count(op);
 102   }
 103
 104   /// For ArrayLoad `load`, return the transitive set of all OpOperands.
 105   UseSetT getLoadUseSet(mlir::Operation *load) const {
 106     assert(loadMapSets.count(load) && "analysis missed an array load?");
 107     return loadMapSets.lookup(load);
 108   }
 109
 110   void arrayMentions(llvm::SmallVectorImpl<mlir::Operation *> &mentions,
 111                      ArrayLoadOp load);
 112
 113 private:
 114   void construct(mlir::Operation *topLevelOp);
 115
 116   mlir::Operation *operation; // operation that analysis ran upon
 117   ConflictSetT conflicts;     // set of conflicts (loads and merge stores)
 118   OperationUseMapT useMap;
 119   LoadMapSetsT loadMapSets;
 120   // Set of array_access ops associated with array_amend ops.
 121   AmendAccessSetT amendAccesses;
 122   bool optimizeConflicts;
 123 };
 124
 125 // Optimized array copy analysis that takes into account Fortran
 126 // variable attributes to prove that no conflict is possible
 127 // and reduce the number of temporary arrays.
 128 class ArrayCopyAnalysisOptimized : public ArrayCopyAnalysisBase {
 129 public:
 130   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ArrayCopyAnalysisOptimized)
 131
 132   ArrayCopyAnalysisOptimized(mlir::Operation *op)
 133       : ArrayCopyAnalysisBase(op, /*optimized=*/true) {}
 134 };
 135
 136 // Unoptimized array copy analysis used at O0.
 137 class ArrayCopyAnalysis : public ArrayCopyAnalysisBase {
 138 public:
 139   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ArrayCopyAnalysis)
 140
 141   ArrayCopyAnalysis(mlir::Operation *op)
 142       : ArrayCopyAnalysisBase(op, /*optimized=*/false) {}
 143 };
 144 } // namespace
 145
 146 namespace {
 147 /// Helper class to collect all array operations that produced an array value.
 148 class ReachCollector {
 149 public:
 150   ReachCollector(llvm::SmallVectorImpl<mlir::Operation *> &reach,
 151                  mlir::Region *loopRegion)
 152       : reach{reach}, loopRegion{loopRegion} {}
 153
 154   void collectArrayMentionFrom(mlir::Operation *op, mlir::ValueRange range) {
 155     if (range.empty()) {
 156       collectArrayMentionFrom(op, mlir::Value{});
 157       return;
 158     }
 159     for (mlir::Value v : range)
 160       collectArrayMentionFrom(v);
 161   }
 162
 163   // Collect all the array_access ops in `block`. This recursively looks into
 164   // blocks in ops with regions.
 165   // FIXME: This is temporarily relying on the array_amend appearing in a
 166   // do_loop Region.  This phase ordering assumption can be eliminated by using
 167   // dominance information to find the array_access ops or by scanning the
 168   // transitive closure of the amending array_access's users and the defs that
 169   // reach them.
 170   void collectAccesses(llvm::SmallVector<ArrayAccessOp> &result,
 171                        mlir::Block *block) {
 172     for (auto &op : *block) {
 173       if (auto access = mlir::dyn_cast<ArrayAccessOp>(op)) {
 174         LLVM_DEBUG(llvm::dbgs() << "adding access: " << access << '\n');
 175         result.push_back(access);
 176         continue;
 177       }
 178       for (auto &region : op.getRegions())
 179         for (auto &bb : region.getBlocks())
 180           collectAccesses(result, &bb);
 181     }
 182   }
 183
 184   void collectArrayMentionFrom(mlir::Operation *op, mlir::Value val) {
 185     // `val` is defined by an Op, process the defining Op.
 186     // If `val` is defined by a region containing Op, we want to drill down
 187     // and through that Op's region(s).
 188     LLVM_DEBUG(llvm::dbgs() << "popset: " << *op << '\n');
 189     auto popFn = [&](auto rop) {
 190       assert(val && "op must have a result value");
 191       auto resNum = val.cast<mlir::OpResult>().getResultNumber();
 192       llvm::SmallVector<mlir::Value> results;
 193       rop.resultToSourceOps(results, resNum);
 194       for (auto u : results)
 195         collectArrayMentionFrom(u);
 196     };
 197     if (auto rop = mlir::dyn_cast<DoLoopOp>(op)) {
 198       popFn(rop);
 199       return;
 200     }
 201     if (auto rop = mlir::dyn_cast<IterWhileOp>(op)) {
 202       popFn(rop);
 203       return;
 204     }
 205     if (auto rop = mlir::dyn_cast<fir::IfOp>(op)) {
 206       popFn(rop);
 207       return;
 208     }
 209     if (auto box = mlir::dyn_cast<EmboxOp>(op)) {
 210       for (auto *user : box.getMemref().getUsers())
 211         if (user != op)
 212           collectArrayMentionFrom(user, user->getResults());
 213       return;
 214     }
 215     if (auto mergeStore = mlir::dyn_cast<ArrayMergeStoreOp>(op)) {
 216       if (opIsInsideLoops(mergeStore))
 217         collectArrayMentionFrom(mergeStore.getSequence());
 218       return;
 219     }
 220
 221     if (mlir::isa<AllocaOp, AllocMemOp>(op)) {
 222       // Look for any stores inside the loops, and collect an array operation
 223       // that produced the value being stored to it.
 224       for (auto *user : op->getUsers())
 225         if (auto store = mlir::dyn_cast<fir::StoreOp>(user))
 226           if (opIsInsideLoops(store))
 227             collectArrayMentionFrom(store.getValue());
 228       return;
 229     }
 230
 231     // Scan the uses of amend's memref
 232     if (auto amend = mlir::dyn_cast<ArrayAmendOp>(op)) {
 233       reach.push_back(op);
 234       llvm::SmallVector<ArrayAccessOp> accesses;
 235       collectAccesses(accesses, op->getBlock());
 236       for (auto access : accesses)
 237         collectArrayMentionFrom(access.getResult());
 238     }
 239
 240     // Otherwise, Op does not contain a region so just chase its operands.
 241     if (mlir::isa<ArrayAccessOp, ArrayLoadOp, ArrayUpdateOp, ArrayModifyOp,
 242                   ArrayFetchOp>(op)) {
 243       LLVM_DEBUG(llvm::dbgs() << "add " << *op << " to reachable set\n");
 244       reach.push_back(op);
 245     }
 246
 247     // Include all array_access ops using an array_load.
 248     if (auto arrLd = mlir::dyn_cast<ArrayLoadOp>(op))
 249       for (auto *user : arrLd.getResult().getUsers())
 250         if (mlir::isa<ArrayAccessOp>(user)) {
 251           LLVM_DEBUG(llvm::dbgs() << "add " << *user << " to reachable set\n");
 252           reach.push_back(user);
 253         }
 254
 255     // Array modify assignment is performed on the result. So the analysis must
 256     // look at the what is done with the result.
 257     if (mlir::isa<ArrayModifyOp>(op))
 258       for (auto *user : op->getResult(0).getUsers())
 259         followUsers(user);
 260
 261     if (mlir::isa<fir::CallOp>(op)) {
 262       LLVM_DEBUG(llvm::dbgs() << "add " << *op << " to reachable set\n");
 263       reach.push_back(op);
 264     }
 265
 266     for (auto u : op->getOperands())
 267       collectArrayMentionFrom(u);
 268   }
 269
 270   void collectArrayMentionFrom(mlir::BlockArgument ba) {
 271     auto *parent = ba.getOwner()->getParentOp();
 272     // If inside an Op holding a region, the block argument corresponds to an
 273     // argument passed to the containing Op.
 274     auto popFn = [&](auto rop) {
 275       collectArrayMentionFrom(rop.blockArgToSourceOp(ba.getArgNumber()));
 276     };
 277     if (auto rop = mlir::dyn_cast<DoLoopOp>(parent)) {
 278       popFn(rop);
 279       return;
 280     }
 281     if (auto rop = mlir::dyn_cast<IterWhileOp>(parent)) {
 282       popFn(rop);
 283       return;
 284     }
 285     // Otherwise, a block argument is provided via the pred blocks.
 286     for (auto *pred : ba.getOwner()->getPredecessors()) {
 287       auto u = pred->getTerminator()->getOperand(ba.getArgNumber());
 288       collectArrayMentionFrom(u);
 289     }
 290   }
 291
 292   // Recursively trace operands to find all array operations relating to the
 293   // values merged.
 294   void collectArrayMentionFrom(mlir::Value val) {
 295     if (!val || visited.contains(val))
 296       return;
 297     visited.insert(val);
 298
 299     // Process a block argument.
 300     if (auto ba = val.dyn_cast<mlir::BlockArgument>()) {
 301       collectArrayMentionFrom(ba);
 302       return;
 303     }
 304
 305     // Process an Op.
 306     if (auto *op = val.getDefiningOp()) {
 307       collectArrayMentionFrom(op, val);
 308       return;
 309     }
 310
 311     emitFatalError(val.getLoc(), "unhandled value");
 312   }
 313
 314   /// Return all ops that produce the array value that is stored into the
 315   /// `array_merge_store`.
 316   static void reachingValues(llvm::SmallVectorImpl<mlir::Operation *> &reach,
 317                              mlir::Value seq) {
 318     reach.clear();
 319     mlir::Region *loopRegion = nullptr;
 320     if (auto doLoop = mlir::dyn_cast_or_null<DoLoopOp>(seq.getDefiningOp()))
 321       loopRegion = &doLoop->getRegion(0);
 322     ReachCollector collector(reach, loopRegion);
 323     collector.collectArrayMentionFrom(seq);
 324   }
 325
 326 private:
 327   /// Is \op inside the loop nest region ?
 328   /// FIXME: replace this structural dependence with graph properties.
 329   bool opIsInsideLoops(mlir::Operation *op) const {
 330     auto *region = op->getParentRegion();
 331     while (region) {
 332       if (region == loopRegion)
 333         return true;
 334       region = region->getParentRegion();
 335     }
 336     return false;
 337   }
 338
 339   /// Recursively trace the use of an operation results, calling
 340   /// collectArrayMentionFrom on the direct and indirect user operands.
 341   void followUsers(mlir::Operation *op) {
 342     for (auto userOperand : op->getOperands())
 343       collectArrayMentionFrom(userOperand);
 344     // Go through potential converts/coordinate_op.
 345     for (auto indirectUser : op->getUsers())
 346       followUsers(indirectUser);
 347   }
 348
 349   llvm::SmallVectorImpl<mlir::Operation *> &reach;
 350   llvm::SmallPtrSet<mlir::Value, 16> visited;
 351   /// Region of the loops nest that produced the array value.
 352   mlir::Region *loopRegion;
 353 };
 354 } // namespace
 355
 356 /// Find all the array operations that access the array value that is loaded by
 357 /// the array load operation, `load`.
 358 void ArrayCopyAnalysisBase::arrayMentions(
 359     llvm::SmallVectorImpl<mlir::Operation *> &mentions, ArrayLoadOp load) {
 360   mentions.clear();
 361   auto lmIter = loadMapSets.find(load);
 362   if (lmIter != loadMapSets.end()) {
 363     for (auto *opnd : lmIter->second) {
 364       auto *owner = opnd->getOwner();
 365       if (mlir::isa<ArrayAccessOp, ArrayAmendOp, ArrayFetchOp, ArrayUpdateOp,
 366                     ArrayModifyOp>(owner))
 367         mentions.push_back(owner);
 368     }
 369     return;
 370   }
 371
 372   UseSetT visited;
 373   llvm::SmallVector<mlir::OpOperand *> queue; // uses of ArrayLoad[orig]
 374
 375   auto appendToQueue = [&](mlir::Value val) {
 376     for (auto &use : val.getUses())
 377       if (!visited.count(&use)) {
 378         visited.insert(&use);
 379         queue.push_back(&use);
 380       }
 381   };
 382
 383   // Build the set of uses of `original`.
 384   // let USES = { uses of original fir.load }
 385   appendToQueue(load);
 386
 387   // Process the worklist until done.
 388   while (!queue.empty()) {
 389     mlir::OpOperand *operand = queue.pop_back_val();
 390     mlir::Operation *owner = operand->getOwner();
 391     if (!owner)
 392       continue;
 393     auto structuredLoop = [&](auto ro) {
 394       if (auto blockArg = ro.iterArgToBlockArg(operand->get())) {
 395         int64_t arg = blockArg.getArgNumber();
 396         mlir::Value output = ro.getResult(ro.getFinalValue() ? arg : arg - 1);
 397         appendToQueue(output);
 398         appendToQueue(blockArg);
 399       }
 400     };
 401     // TODO: this need to be updated to use the control-flow interface.
 402     auto branchOp = [&](mlir::Block *dest, OperandRange operands) {
 403       if (operands.empty())
 404         return;
 405
 406       // Check if this operand is within the range.
 407       unsigned operandIndex = operand->getOperandNumber();
 408       unsigned operandsStart = operands.getBeginOperandIndex();
 409       if (operandIndex < operandsStart ||
 410           operandIndex >= (operandsStart + operands.size()))
 411         return;
 412
 413       // Index the successor.
 414       unsigned argIndex = operandIndex - operandsStart;
 415       appendToQueue(dest->getArgument(argIndex));
 416     };
 417     // Thread uses into structured loop bodies and return value uses.
 418     if (auto ro = mlir::dyn_cast<DoLoopOp>(owner)) {
 419       structuredLoop(ro);
 420     } else if (auto ro = mlir::dyn_cast<IterWhileOp>(owner)) {
 421       structuredLoop(ro);
 422     } else if (auto rs = mlir::dyn_cast<ResultOp>(owner)) {
 423       // Thread any uses of fir.if that return the marked array value.
 424       mlir::Operation *parent = rs->getParentRegion()->getParentOp();
 425       if (auto ifOp = mlir::dyn_cast<fir::IfOp>(parent))
 426         appendToQueue(ifOp.getResult(operand->getOperandNumber()));
 427     } else if (mlir::isa<ArrayFetchOp>(owner)) {
 428       // Keep track of array value fetches.
 429       LLVM_DEBUG(llvm::dbgs()
 430                  << "add fetch {" << *owner << "} to array value set\n");
 431       mentions.push_back(owner);
 432     } else if (auto update = mlir::dyn_cast<ArrayUpdateOp>(owner)) {
 433       // Keep track of array value updates and thread the return value uses.
 434       LLVM_DEBUG(llvm::dbgs()
 435                  << "add update {" << *owner << "} to array value set\n");
 436       mentions.push_back(owner);
 437       appendToQueue(update.getResult());
 438     } else if (auto update = mlir::dyn_cast<ArrayModifyOp>(owner)) {
 439       // Keep track of array value modification and thread the return value
 440       // uses.
 441       LLVM_DEBUG(llvm::dbgs()
 442                  << "add modify {" << *owner << "} to array value set\n");
 443       mentions.push_back(owner);
 444       appendToQueue(update.getResult(1));
 445     } else if (auto mention = mlir::dyn_cast<ArrayAccessOp>(owner)) {
 446       mentions.push_back(owner);
 447     } else if (auto amend = mlir::dyn_cast<ArrayAmendOp>(owner)) {
 448       mentions.push_back(owner);
 449       appendToQueue(amend.getResult());
 450     } else if (auto br = mlir::dyn_cast<mlir::cf::BranchOp>(owner)) {
 451       branchOp(br.getDest(), br.getDestOperands());
 452     } else if (auto br = mlir::dyn_cast<mlir::cf::CondBranchOp>(owner)) {
 453       branchOp(br.getTrueDest(), br.getTrueOperands());
 454       branchOp(br.getFalseDest(), br.getFalseOperands());
 455     } else if (mlir::isa<ArrayMergeStoreOp>(owner)) {
 456       // do nothing
 457     } else {
 458       llvm::report_fatal_error("array value reached unexpected op");
 459     }
 460   }
 461   loadMapSets.insert({load, visited});
 462 }
 463
 464 static bool hasPointerType(mlir::Type type) {
 465   if (auto boxTy = type.dyn_cast<BoxType>())
 466     type = boxTy.getEleTy();
 467   return type.isa<fir::PointerType>();
 468 }
 469
 470 // This is a NF performance hack. It makes a simple test that the slices of the
 471 // load, \p ld, and the merge store, \p st, are trivially mutually exclusive.
 472 static bool mutuallyExclusiveSliceRange(ArrayLoadOp ld, ArrayMergeStoreOp st) {
 473   // If the same array_load, then no further testing is warranted.
 474   if (ld.getResult() == st.getOriginal())
 475     return false;
 476
 477   auto getSliceOp = [](mlir::Value val) -> SliceOp {
 478     if (!val)
 479       return {};
 480     auto sliceOp = mlir::dyn_cast_or_null<SliceOp>(val.getDefiningOp());
 481     if (!sliceOp)
 482       return {};
 483     return sliceOp;
 484   };
 485
 486   auto ldSlice = getSliceOp(ld.getSlice());
 487   auto stSlice = getSliceOp(st.getSlice());
 488   if (!ldSlice || !stSlice)
 489     return false;
 490
 491   // Resign on subobject slices.
 492   if (!ldSlice.getFields().empty() || !stSlice.getFields().empty() ||
 493       !ldSlice.getSubstr().empty() || !stSlice.getSubstr().empty())
 494     return false;
 495
 496   // Crudely test that the two slices do not overlap by looking for the
 497   // following general condition. If the slices look like (i:j) and (j+1:k) then
 498   // these ranges do not overlap. The addend must be a constant.
 499   auto ldTriples = ldSlice.getTriples();
 500   auto stTriples = stSlice.getTriples();
 501   const auto size = ldTriples.size();
 502   if (size != stTriples.size())
 503     return false;
 504
 505   auto displacedByConstant = [](mlir::Value v1, mlir::Value v2) {
 506     auto removeConvert = [](mlir::Value v) -> mlir::Operation * {
 507       auto *op = v.getDefiningOp();
 508       while (auto conv = mlir::dyn_cast_or_null<ConvertOp>(op))
 509         op = conv.getValue().getDefiningOp();
 510       return op;
 511     };
 512
 513     auto isPositiveConstant = [](mlir::Value v) -> bool {
 514       if (auto conOp =
 515               mlir::dyn_cast<mlir::arith::ConstantOp>(v.getDefiningOp()))
 516         if (auto iattr = conOp.getValue().dyn_cast<mlir::IntegerAttr>())
 517           return iattr.getInt() > 0;
 518       return false;
 519     };
 520
 521     auto *op1 = removeConvert(v1);
 522     auto *op2 = removeConvert(v2);
 523     if (!op1 || !op2)
 524       return false;
 525     if (auto addi = mlir::dyn_cast<mlir::arith::AddIOp>(op2))
 526       if ((addi.getLhs().getDefiningOp() == op1 &&
 527            isPositiveConstant(addi.getRhs())) ||
 528           (addi.getRhs().getDefiningOp() == op1 &&
 529            isPositiveConstant(addi.getLhs())))
 530         return true;
 531     if (auto subi = mlir::dyn_cast<mlir::arith::SubIOp>(op1))
 532       if (subi.getLhs().getDefiningOp() == op2 &&
 533           isPositiveConstant(subi.getRhs()))
 534         return true;
 535     return false;
 536   };
 537
 538   for (std::remove_const_t<decltype(size)> i = 0; i < size; i += 3) {
 539     // If both are loop invariant, skip to the next triple.
 540     if (mlir::isa_and_nonnull<fir::UndefOp>(ldTriples[i + 1].getDefiningOp()) &&
 541         mlir::isa_and_nonnull<fir::UndefOp>(stTriples[i + 1].getDefiningOp())) {
 542       // Unless either is a vector index, then be conservative.
 543       if (mlir::isa_and_nonnull<fir::UndefOp>(ldTriples[i].getDefiningOp()) ||
 544           mlir::isa_and_nonnull<fir::UndefOp>(stTriples[i].getDefiningOp()))
 545         return false;
 546       continue;
 547     }
 548     // If identical, skip to the next triple.
 549     if (ldTriples[i] == stTriples[i] && ldTriples[i + 1] == stTriples[i + 1] &&
 550         ldTriples[i + 2] == stTriples[i + 2])
 551       continue;
 552     // If ubound and lbound are the same with a constant offset, skip to the
 553     // next triple.
 554     if (displacedByConstant(ldTriples[i + 1], stTriples[i]) ||
 555         displacedByConstant(stTriples[i + 1], ldTriples[i]))
 556       continue;
 557     return false;
 558   }
 559   LLVM_DEBUG(llvm::dbgs() << "detected non-overlapping slice ranges on " << ld
 560                           << " and " << st << ", which is not a conflict\n");
 561   return true;
 562 }
 563
 564 /// Is there a conflict between the array value that was updated and to be
 565 /// stored to `st` and the set of arrays loaded (`reach`) and used to compute
 566 /// the updated value?
 567 /// If `optimize` is true, use the variable attributes to prove that
 568 /// there is no conflict.
 569 static bool conflictOnLoad(llvm::ArrayRef<mlir::Operation *> reach,
 570                            ArrayMergeStoreOp st, bool optimize) {
 571   mlir::Value load;
 572   mlir::Value addr = st.getMemref();
 573   const bool storeHasPointerType = hasPointerType(addr.getType());
 574   for (auto *op : reach)
 575     if (auto ld = mlir::dyn_cast<ArrayLoadOp>(op)) {
 576       mlir::Type ldTy = ld.getMemref().getType();
 577       if (ld.getMemref() == addr) {
 578         if (mutuallyExclusiveSliceRange(ld, st))
 579           continue;
 580         if (ld.getResult() != st.getOriginal())
 581           return true;
 582         if (load) {
 583           // TODO: extend this to allow checking if the first `load` and this
 584           // `ld` are mutually exclusive accesses but not identical.
 585           return true;
 586         }
 587         load = ld;
 588       } else if (storeHasPointerType) {
 589         if (optimize && !hasPointerType(ldTy) &&
 590             !valueMayHaveFirAttributes(
 591                 ld.getMemref(),
 592                 {getTargetAttrName(), GlobalOp::getTargetAttrNameStr()}))
 593           continue;
 594
 595         return true;
 596       } else if (hasPointerType(ldTy)) {
 597         if (optimize && !storeHasPointerType &&
 598             !valueMayHaveFirAttributes(
 599                 addr, {getTargetAttrName(), GlobalOp::getTargetAttrNameStr()}))
 600           continue;
 601
 602         return true;
 603       }
 604       // TODO: Check if types can also allow ruling out some cases. For now,
 605       // the fact that equivalences is using pointer attribute to enforce
 606       // aliasing is preventing any attempt to do so, and in general, it may
 607       // be wrong to use this if any of the types is a complex or a derived
 608       // for which it is possible to create a pointer to a part with a
 609       // different type than the whole, although this deserve some more
 610       // investigation because existing compiler behavior seem to diverge
 611       // here.
 612     }
 613   return false;
 614 }
 615
 616 /// Is there an access vector conflict on the array being merged into? If the
 617 /// access vectors diverge, then assume that there are potentially overlapping
 618 /// loop-carried references.
 619 static bool conflictOnMerge(llvm::ArrayRef<mlir::Operation *> mentions) {
 620   if (mentions.size() < 2)
 621     return false;
 622   llvm::SmallVector<mlir::Value> indices;
 623   LLVM_DEBUG(llvm::dbgs() << "check merge conflict on with " << mentions.size()
 624                           << " mentions on the list\n");
 625   bool valSeen = false;
 626   bool refSeen = false;
 627   for (auto *op : mentions) {
 628     llvm::SmallVector<mlir::Value> compareVector;
 629     if (auto u = mlir::dyn_cast<ArrayUpdateOp>(op)) {
 630       valSeen = true;
 631       if (indices.empty()) {
 632         indices = u.getIndices();
 633         continue;
 634       }
 635       compareVector = u.getIndices();
 636     } else if (auto f = mlir::dyn_cast<ArrayModifyOp>(op)) {
 637       valSeen = true;
 638       if (indices.empty()) {
 639         indices = f.getIndices();
 640         continue;
 641       }
 642       compareVector = f.getIndices();
 643     } else if (auto f = mlir::dyn_cast<ArrayFetchOp>(op)) {
 644       valSeen = true;
 645       if (indices.empty()) {
 646         indices = f.getIndices();
 647         continue;
 648       }
 649       compareVector = f.getIndices();
 650     } else if (auto f = mlir::dyn_cast<ArrayAccessOp>(op)) {
 651       refSeen = true;
 652       if (indices.empty()) {
 653         indices = f.getIndices();
 654         continue;
 655       }
 656       compareVector = f.getIndices();
 657     } else if (mlir::isa<ArrayAmendOp>(op)) {
 658       refSeen = true;
 659       continue;
 660     } else {
 661       mlir::emitError(op->getLoc(), "unexpected operation in analysis");
 662     }
 663     if (compareVector.size() != indices.size() ||
 664         llvm::any_of(llvm::zip(compareVector, indices), [&](auto pair) {
 665           return std::get<0>(pair) != std::get<1>(pair);
 666         }))
 667       return true;
 668     LLVM_DEBUG(llvm::dbgs() << "vectors compare equal\n");
 669   }
 670   return valSeen && refSeen;
 671 }
 672
 673 /// With element-by-reference semantics, an amended array with more than once
 674 /// access to the same loaded array are conservatively considered a conflict.
 675 /// Note: the array copy can still be eliminated in subsequent optimizations.
 676 static bool conflictOnReference(llvm::ArrayRef<mlir::Operation *> mentions) {
 677   LLVM_DEBUG(llvm::dbgs() << "checking reference semantics " << mentions.size()
 678                           << '\n');
 679   if (mentions.size() < 3)
 680     return false;
 681   unsigned amendCount = 0;
 682   unsigned accessCount = 0;
 683   for (auto *op : mentions) {
 684     if (mlir::isa<ArrayAmendOp>(op) && ++amendCount > 1) {
 685       LLVM_DEBUG(llvm::dbgs() << "conflict: multiple amends of array value\n");
 686       return true;
 687     }
 688     if (mlir::isa<ArrayAccessOp>(op) && ++accessCount > 1) {
 689       LLVM_DEBUG(llvm::dbgs()
 690                  << "conflict: multiple accesses of array value\n");
 691       return true;
 692     }
 693     if (mlir::isa<ArrayFetchOp, ArrayUpdateOp, ArrayModifyOp>(op)) {
 694       LLVM_DEBUG(llvm::dbgs()
 695                  << "conflict: array value has both uses by-value and uses "
 696                     "by-reference. conservative assumption.\n");
 697       return true;
 698     }
 699   }
 700   return false;
 701 }
 702
 703 static mlir::Operation *
 704 amendingAccess(llvm::ArrayRef<mlir::Operation *> mentions) {
 705   for (auto *op : mentions)
 706     if (auto amend = mlir::dyn_cast<ArrayAmendOp>(op))
 707       return amend.getMemref().getDefiningOp();
 708   return {};
 709 }
 710
 711 // Are any conflicts present? The conflicts detected here are described above.
 712 static bool conflictDetected(llvm::ArrayRef<mlir::Operation *> reach,
 713                              llvm::ArrayRef<mlir::Operation *> mentions,
 714                              ArrayMergeStoreOp st, bool optimize) {
 715   return conflictOnLoad(reach, st, optimize) || conflictOnMerge(mentions);
 716 }
 717
 718 // Assume that any call to a function that uses host-associations will be
 719 // modifying the output array.
 720 static bool
 721 conservativeCallConflict(llvm::ArrayRef<mlir::Operation *> reaches) {
 722   return llvm::any_of(reaches, [](mlir::Operation *op) {
 723     if (auto call = mlir::dyn_cast<fir::CallOp>(op))
 724       if (auto callee =
 725               call.getCallableForCallee().dyn_cast<mlir::SymbolRefAttr>()) {
 726         auto module = op->getParentOfType<mlir::ModuleOp>();
 727         return isInternalPorcedure(
 728             module.lookupSymbol<mlir::func::FuncOp>(callee));
 729       }
 730     return false;
 731   });
 732 }
 733
 734 /// Constructor of the array copy analysis.
 735 /// This performs the analysis and saves the intermediate results.
 736 void ArrayCopyAnalysisBase::construct(mlir::Operation *topLevelOp) {
 737   topLevelOp->walk([&](Operation *op) {
 738     if (auto st = mlir::dyn_cast<fir::ArrayMergeStoreOp>(op)) {
 739       llvm::SmallVector<mlir::Operation *> values;
 740       ReachCollector::reachingValues(values, st.getSequence());
 741       bool callConflict = conservativeCallConflict(values);
 742       llvm::SmallVector<mlir::Operation *> mentions;
 743       arrayMentions(mentions,
 744                     mlir::cast<ArrayLoadOp>(st.getOriginal().getDefiningOp()));
 745       bool conflict = conflictDetected(values, mentions, st, optimizeConflicts);
 746       bool refConflict = conflictOnReference(mentions);
 747       if (callConflict || conflict || refConflict) {
 748         LLVM_DEBUG(llvm::dbgs()
 749                    << "CONFLICT: copies required for " << st << '\n'
 750                    << "   adding conflicts on: " << *op << " and "
 751                    << st.getOriginal() << '\n');
 752         conflicts.insert(op);
 753         conflicts.insert(st.getOriginal().getDefiningOp());
 754         if (auto *access = amendingAccess(mentions))
 755           amendAccesses.insert(access);
 756       }
 757       auto *ld = st.getOriginal().getDefiningOp();
 758       LLVM_DEBUG(llvm::dbgs()
 759                  << "map: adding {" << *ld << " -> " << st << "}\n");
 760       useMap.insert({ld, op});
 761     } else if (auto load = mlir::dyn_cast<ArrayLoadOp>(op)) {
 762       llvm::SmallVector<mlir::Operation *> mentions;
 763       arrayMentions(mentions, load);
 764       LLVM_DEBUG(llvm::dbgs() << "process load: " << load
 765                               << ", mentions: " << mentions.size() << '\n');
 766       for (auto *acc : mentions) {
 767         LLVM_DEBUG(llvm::dbgs() << " mention: " << *acc << '\n');
 768         if (mlir::isa<ArrayAccessOp, ArrayAmendOp, ArrayFetchOp, ArrayUpdateOp,
 769                       ArrayModifyOp>(acc)) {
 770           if (useMap.count(acc)) {
 771             mlir::emitError(
 772                 load.getLoc(),
 773                 "The parallel semantics of multiple array_merge_stores per "
 774                 "array_load are not supported.");
 775             continue;
 776           }
 777           LLVM_DEBUG(llvm::dbgs()
 778                      << "map: adding {" << *acc << "} -> {" << load << "}\n");
 779           useMap.insert({acc, op});
 780         }
 781       }
 782     }
 783   });
 784 }
 785
 786 //===----------------------------------------------------------------------===//
 787 // Conversions for converting out of array value form.
 788 //===----------------------------------------------------------------------===//
 789
 790 namespace {
 791 class ArrayLoadConversion : public mlir::OpRewritePattern<ArrayLoadOp> {
 792 public:
 793   using OpRewritePattern::OpRewritePattern;
 794
 795   mlir::LogicalResult
 796   matchAndRewrite(ArrayLoadOp load,
 797                   mlir::PatternRewriter &rewriter) const override {
 798     LLVM_DEBUG(llvm::dbgs() << "replace load " << load << " with undef.\n");
 799     rewriter.replaceOpWithNewOp<UndefOp>(load, load.getType());
 800     return mlir::success();
 801   }
 802 };
 803
 804 class ArrayMergeStoreConversion
 805     : public mlir::OpRewritePattern<ArrayMergeStoreOp> {
 806 public:
 807   using OpRewritePattern::OpRewritePattern;
 808
 809   mlir::LogicalResult
 810   matchAndRewrite(ArrayMergeStoreOp store,
 811                   mlir::PatternRewriter &rewriter) const override {
 812     LLVM_DEBUG(llvm::dbgs() << "marking store " << store << " as dead.\n");
 813     rewriter.eraseOp(store);
 814     return mlir::success();
 815   }
 816 };
 817 } // namespace
 818
 819 static mlir::Type getEleTy(mlir::Type ty) {
 820   auto eleTy = unwrapSequenceType(unwrapPassByRefType(ty));
 821   // FIXME: keep ptr/heap/ref information.
 822   return ReferenceType::get(eleTy);
 823 }
 824
 825 // Extract extents from the ShapeOp/ShapeShiftOp into the result vector.
 826 static bool getAdjustedExtents(mlir::Location loc,
 827                                mlir::PatternRewriter &rewriter,
 828                                ArrayLoadOp arrLoad,
 829                                llvm::SmallVectorImpl<mlir::Value> &result,
 830                                mlir::Value shape) {
 831   bool copyUsingSlice = false;
 832   auto *shapeOp = shape.getDefiningOp();
 833   if (auto s = mlir::dyn_cast_or_null<ShapeOp>(shapeOp)) {
 834     auto e = s.getExtents();
 835     result.insert(result.end(), e.begin(), e.end());
 836   } else if (auto s = mlir::dyn_cast_or_null<ShapeShiftOp>(shapeOp)) {
 837     auto e = s.getExtents();
 838     result.insert(result.end(), e.begin(), e.end());
 839   } else {
 840     emitFatalError(loc, "not a fir.shape/fir.shape_shift op");
 841   }
 842   auto idxTy = rewriter.getIndexType();
 843   if (factory::isAssumedSize(result)) {
 844     // Use slice information to compute the extent of the column.
 845     auto one = rewriter.create<mlir::arith::ConstantIndexOp>(loc, 1);
 846     mlir::Value size = one;
 847     if (mlir::Value sliceArg = arrLoad.getSlice()) {
 848       if (auto sliceOp =
 849               mlir::dyn_cast_or_null<SliceOp>(sliceArg.getDefiningOp())) {
 850         auto triples = sliceOp.getTriples();
 851         const std::size_t tripleSize = triples.size();
 852         auto module = arrLoad->getParentOfType<mlir::ModuleOp>();
 853         fir::KindMapping kindMap = getKindMapping(module);
 854         FirOpBuilder builder(rewriter, kindMap);
 855         size = builder.genExtentFromTriplet(loc, triples[tripleSize - 3],
 856                                             triples[tripleSize - 2],
 857                                             triples[tripleSize - 1], idxTy);
 858         copyUsingSlice = true;
 859       }
 860     }
 861     result[result.size() - 1] = size;
 862   }
 863   return copyUsingSlice;
 864 }
 865
 866 /// Place the extents of the array load, \p arrLoad, into \p result and
 867 /// return a ShapeOp or ShapeShiftOp with the same extents. If \p arrLoad is
 868 /// loading a `!fir.box`, code will be generated to read the extents from the
 869 /// boxed value, and the retunred shape Op will be built with the extents read
 870 /// from the box. Otherwise, the extents will be extracted from the ShapeOp (or
 871 /// ShapeShiftOp) argument of \p arrLoad. \p copyUsingSlice will be set to true
 872 /// if slicing of the output array is to be done in the copy-in/copy-out rather
 873 /// than in the elemental computation step.
 874 static mlir::Value getOrReadExtentsAndShapeOp(
 875     mlir::Location loc, mlir::PatternRewriter &rewriter, ArrayLoadOp arrLoad,
 876     llvm::SmallVectorImpl<mlir::Value> &result, bool &copyUsingSlice) {
 877   assert(result.empty());
 878   if (arrLoad->hasAttr(fir::getOptionalAttrName()))
 879     fir::emitFatalError(
 880         loc, "shapes from array load of OPTIONAL arrays must not be used");
 881   if (auto boxTy = arrLoad.getMemref().getType().dyn_cast<BoxType>()) {
 882     auto rank =
 883         dyn_cast_ptrOrBoxEleTy(boxTy).cast<SequenceType>().getDimension();
 884     auto idxTy = rewriter.getIndexType();
 885     for (decltype(rank) dim = 0; dim < rank; ++dim) {
 886       auto dimVal = rewriter.create<mlir::arith::ConstantIndexOp>(loc, dim);
 887       auto dimInfo = rewriter.create<BoxDimsOp>(loc, idxTy, idxTy, idxTy,
 888                                                 arrLoad.getMemref(), dimVal);
 889       result.emplace_back(dimInfo.getResult(1));
 890     }
 891     if (!arrLoad.getShape()) {
 892       auto shapeType = ShapeType::get(rewriter.getContext(), rank);
 893       return rewriter.create<ShapeOp>(loc, shapeType, result);
 894     }
 895     auto shiftOp = arrLoad.getShape().getDefiningOp<ShiftOp>();
 896     auto shapeShiftType = ShapeShiftType::get(rewriter.getContext(), rank);
 897     llvm::SmallVector<mlir::Value> shapeShiftOperands;
 898     for (auto [lb, extent] : llvm::zip(shiftOp.getOrigins(), result)) {
 899       shapeShiftOperands.push_back(lb);
 900       shapeShiftOperands.push_back(extent);
 901     }
 902     return rewriter.create<ShapeShiftOp>(loc, shapeShiftType,
 903                                          shapeShiftOperands);
 904   }
 905   copyUsingSlice =
 906       getAdjustedExtents(loc, rewriter, arrLoad, result, arrLoad.getShape());
 907   return arrLoad.getShape();
 908 }
 909
 910 static mlir::Type toRefType(mlir::Type ty) {
 911   if (fir::isa_ref_type(ty))
 912     return ty;
 913   return fir::ReferenceType::get(ty);
 914 }
 915
 916 static llvm::SmallVector<mlir::Value>
 917 getTypeParamsIfRawData(mlir::Location loc, FirOpBuilder &builder,
 918                        ArrayLoadOp arrLoad, mlir::Type ty) {
 919   if (ty.isa<BoxType>())
 920     return {};
 921   return fir::factory::getTypeParams(loc, builder, arrLoad);
 922 }
 923
 924 static mlir::Value genCoorOp(mlir::PatternRewriter &rewriter,
 925                              mlir::Location loc, mlir::Type eleTy,
 926                              mlir::Type resTy, mlir::Value alloc,
 927                              mlir::Value shape, mlir::Value slice,
 928                              mlir::ValueRange indices, ArrayLoadOp load,
 929                              bool skipOrig = false) {
 930   llvm::SmallVector<mlir::Value> originated;
 931   if (skipOrig)
 932     originated.assign(indices.begin(), indices.end());
 933   else
 934     originated = factory::originateIndices(loc, rewriter, alloc.getType(),
 935                                            shape, indices);
 936   auto seqTy = dyn_cast_ptrOrBoxEleTy(alloc.getType());
 937   assert(seqTy && seqTy.isa<SequenceType>());
 938   const auto dimension = seqTy.cast<SequenceType>().getDimension();
 939   auto module = load->getParentOfType<mlir::ModuleOp>();
 940   fir::KindMapping kindMap = getKindMapping(module);
 941   FirOpBuilder builder(rewriter, kindMap);
 942   auto typeparams = getTypeParamsIfRawData(loc, builder, load, alloc.getType());
 943   mlir::Value result = rewriter.create<ArrayCoorOp>(
 944       loc, eleTy, alloc, shape, slice,
 945       llvm::ArrayRef<mlir::Value>{originated}.take_front(dimension),
 946       typeparams);
 947   if (dimension < originated.size())
 948     result = rewriter.create<fir::CoordinateOp>(
 949         loc, resTy, result,
 950         llvm::ArrayRef<mlir::Value>{originated}.drop_front(dimension));
 951   return result;
 952 }
 953
 954 static mlir::Value getCharacterLen(mlir::Location loc, FirOpBuilder &builder,
 955                                    ArrayLoadOp load, CharacterType charTy) {
 956   auto charLenTy = builder.getCharacterLengthType();
 957   if (charTy.hasDynamicLen()) {
 958     if (load.getMemref().getType().isa<BoxType>()) {
 959       // The loaded array is an emboxed value. Get the CHARACTER length from
 960       // the box value.
 961       auto eleSzInBytes =
 962           builder.create<BoxEleSizeOp>(loc, charLenTy, load.getMemref());
 963       auto kindSize =
 964           builder.getKindMap().getCharacterBitsize(charTy.getFKind());
 965       auto kindByteSize =
 966           builder.createIntegerConstant(loc, charLenTy, kindSize / 8);
 967       return builder.create<mlir::arith::DivSIOp>(loc, eleSzInBytes,
 968                                                   kindByteSize);
 969     }
 970     // The loaded array is a (set of) unboxed values. If the CHARACTER's
 971     // length is not a constant, it must be provided as a type parameter to
 972     // the array_load.
 973     auto typeparams = load.getTypeparams();
 974     assert(typeparams.size() > 0 && "expected type parameters on array_load");
 975     return typeparams.back();
 976   }
 977   // The typical case: the length of the CHARACTER is a compile-time
 978   // constant that is encoded in the type information.
 979   return builder.createIntegerConstant(loc, charLenTy, charTy.getLen());
 980 }
 981 /// Generate a shallow array copy. This is used for both copy-in and copy-out.
 982 template <bool CopyIn>
 983 void genArrayCopy(mlir::Location loc, mlir::PatternRewriter &rewriter,
 984                   mlir::Value dst, mlir::Value src, mlir::Value shapeOp,
 985                   mlir::Value sliceOp, ArrayLoadOp arrLoad) {
 986   auto insPt = rewriter.saveInsertionPoint();
 987   llvm::SmallVector<mlir::Value> indices;
 988   llvm::SmallVector<mlir::Value> extents;
 989   bool copyUsingSlice =
 990       getAdjustedExtents(loc, rewriter, arrLoad, extents, shapeOp);
 991   auto idxTy = rewriter.getIndexType();
 992   // Build loop nest from column to row.
 993   for (auto sh : llvm::reverse(extents)) {
 994     auto ubi = rewriter.create<ConvertOp>(loc, idxTy, sh);
 995     auto zero = rewriter.create<mlir::arith::ConstantIndexOp>(loc, 0);
 996     auto one = rewriter.create<mlir::arith::ConstantIndexOp>(loc, 1);
 997     auto ub = rewriter.create<mlir::arith::SubIOp>(loc, idxTy, ubi, one);
 998     auto loop = rewriter.create<DoLoopOp>(loc, zero, ub, one);
 999     rewriter.setInsertionPointToStart(loop.getBody());
1000     indices.push_back(loop.getInductionVar());
1001   }
1002   // Reverse the indices so they are in column-major order.
1003   std::reverse(indices.begin(), indices.end());
1004   auto module = arrLoad->getParentOfType<mlir::ModuleOp>();
1005   fir::KindMapping kindMap = getKindMapping(module);
1006   FirOpBuilder builder(rewriter, kindMap);
1007   auto fromAddr = rewriter.create<ArrayCoorOp>(
1008       loc, getEleTy(src.getType()), src, shapeOp,
1009       CopyIn && copyUsingSlice ? sliceOp : mlir::Value{},
1010       factory::originateIndices(loc, rewriter, src.getType(), shapeOp, indices),
1011       getTypeParamsIfRawData(loc, builder, arrLoad, src.getType()));
1012   auto toAddr = rewriter.create<ArrayCoorOp>(
1013       loc, getEleTy(dst.getType()), dst, shapeOp,
1014       !CopyIn && copyUsingSlice ? sliceOp : mlir::Value{},
1015       factory::originateIndices(loc, rewriter, dst.getType(), shapeOp, indices),
1016       getTypeParamsIfRawData(loc, builder, arrLoad, dst.getType()));
1017   auto eleTy = unwrapSequenceType(unwrapPassByRefType(dst.getType()));
1018   // Copy from (to) object to (from) temp copy of same object.
1019   if (auto charTy = eleTy.dyn_cast<CharacterType>()) {
1020     auto len = getCharacterLen(loc, builder, arrLoad, charTy);
1021     CharBoxValue toChar(toAddr, len);
1022     CharBoxValue fromChar(fromAddr, len);
1023     factory::genScalarAssignment(builder, loc, toChar, fromChar);
1024   } else {
1025     if (hasDynamicSize(eleTy))
1026       TODO(loc, "copy element of dynamic size");
1027     factory::genScalarAssignment(builder, loc, toAddr, fromAddr);
1028   }
1029   rewriter.restoreInsertionPoint(insPt);
1030 }
1031
1032 /// The array load may be either a boxed or unboxed value. If the value is
1033 /// boxed, we read the type parameters from the boxed value.
1034 static llvm::SmallVector<mlir::Value>
1035 genArrayLoadTypeParameters(mlir::Location loc, mlir::PatternRewriter &rewriter,
1036                            ArrayLoadOp load) {
1037   if (load.getTypeparams().empty()) {
1038     auto eleTy =
1039         unwrapSequenceType(unwrapPassByRefType(load.getMemref().getType()));
1040     if (hasDynamicSize(eleTy)) {
1041       if (auto charTy = eleTy.dyn_cast<CharacterType>()) {
1042         assert(load.getMemref().getType().isa<BoxType>());
1043         auto module = load->getParentOfType<mlir::ModuleOp>();
1044         fir::KindMapping kindMap = getKindMapping(module);
1045         FirOpBuilder builder(rewriter, kindMap);
1046         return {getCharacterLen(loc, builder, load, charTy)};
1047       }
1048       TODO(loc, "unhandled dynamic type parameters");
1049     }
1050     return {};
1051   }
1052   return load.getTypeparams();
1053 }
1054
1055 static llvm::SmallVector<mlir::Value>
1056 findNonconstantExtents(mlir::Type memrefTy,
1057                        llvm::ArrayRef<mlir::Value> extents) {
1058   llvm::SmallVector<mlir::Value> nce;
1059   auto arrTy = unwrapPassByRefType(memrefTy);
1060   auto seqTy = arrTy.cast<SequenceType>();
1061   for (auto [s, x] : llvm::zip(seqTy.getShape(), extents))
1062     if (s == SequenceType::getUnknownExtent())
1063       nce.emplace_back(x);
1064   if (extents.size() > seqTy.getShape().size())
1065     for (auto x : extents.drop_front(seqTy.getShape().size()))
1066       nce.emplace_back(x);
1067   return nce;
1068 }
1069
1070 /// Allocate temporary storage for an ArrayLoadOp \load and initialize any
1071 /// allocatable direct components of the array elements with an unallocated
1072 /// status. Returns the temporary address as well as a callback to generate the
1073 /// temporary clean-up once it has been used. The clean-up will take care of
1074 /// deallocating all the element allocatable components that may have been
1075 /// allocated while using the temporary.
1076 static std::pair<mlir::Value,
1077                  std::function<void(mlir::PatternRewriter &rewriter)>>
1078 allocateArrayTemp(mlir::Location loc, mlir::PatternRewriter &rewriter,
1079                   ArrayLoadOp load, llvm::ArrayRef<mlir::Value> extents,
1080                   mlir::Value shape) {
1081   mlir::Type baseType = load.getMemref().getType();
1082   llvm::SmallVector<mlir::Value> nonconstantExtents =
1083       findNonconstantExtents(baseType, extents);
1084   llvm::SmallVector<mlir::Value> typeParams =
1085       genArrayLoadTypeParameters(loc, rewriter, load);
1086   mlir::Value allocmem = rewriter.create<AllocMemOp>(
1087       loc, dyn_cast_ptrOrBoxEleTy(baseType), typeParams, nonconstantExtents);
1088   mlir::Type eleType =
1089       fir::unwrapSequenceType(fir::unwrapPassByRefType(baseType));
1090   if (fir::isRecordWithAllocatableMember(eleType)) {
1091     // The allocatable component descriptors need to be set to a clean
1092     // deallocated status before anything is done with them.
1093     mlir::Value box = rewriter.create<fir::EmboxOp>(
1094         loc, fir::BoxType::get(allocmem.getType()), allocmem, shape,
1095         /*slice=*/mlir::Value{}, typeParams);
1096     auto module = load->getParentOfType<mlir::ModuleOp>();
1097     fir::KindMapping kindMap = getKindMapping(module);
1098     FirOpBuilder builder(rewriter, kindMap);
1099     runtime::genDerivedTypeInitialize(builder, loc, box);
1100     // Any allocatable component that may have been allocated must be
1101     // deallocated during the clean-up.
1102     auto cleanup = [=](mlir::PatternRewriter &r) {
1103       fir::KindMapping kindMap = getKindMapping(module);
1104       FirOpBuilder builder(r, kindMap);
1105       runtime::genDerivedTypeDestroy(builder, loc, box);
1106       r.create<FreeMemOp>(loc, allocmem);
1107     };
1108     return {allocmem, cleanup};
1109   }
1110   auto cleanup = [=](mlir::PatternRewriter &r) {
1111     r.create<FreeMemOp>(loc, allocmem);
1112   };
1113   return {allocmem, cleanup};
1114 }
1115
1116 namespace {
1117 /// Conversion of fir.array_update and fir.array_modify Ops.
1118 /// If there is a conflict for the update, then we need to perform a
1119 /// copy-in/copy-out to preserve the original values of the array. If there is
1120 /// no conflict, then it is save to eschew making any copies.
1121 template <typename ArrayOp>
1122 class ArrayUpdateConversionBase : public mlir::OpRewritePattern<ArrayOp> {
1123 public:
1124   // TODO: Implement copy/swap semantics?
1125   explicit ArrayUpdateConversionBase(mlir::MLIRContext *ctx,
1126                                      const ArrayCopyAnalysisBase &a,
1127                                      const OperationUseMapT &m)
1128       : mlir::OpRewritePattern<ArrayOp>{ctx}, analysis{a}, useMap{m} {}
1129
1130   /// The array_access, \p access, is to be to a cloned copy due to a potential
1131   /// conflict. Uses copy-in/copy-out semantics and not copy/swap.
1132   mlir::Value referenceToClone(mlir::Location loc,
1133                                mlir::PatternRewriter &rewriter,
1134                                ArrayOp access) const {
1135     LLVM_DEBUG(llvm::dbgs()
1136                << "generating copy-in/copy-out loops for " << access << '\n');
1137     auto *op = access.getOperation();
1138     auto *loadOp = useMap.lookup(op);
1139     auto load = mlir::cast<ArrayLoadOp>(loadOp);
1140     auto eleTy = access.getType();
1141     rewriter.setInsertionPoint(loadOp);
1142     // Copy in.
1143     llvm::SmallVector<mlir::Value> extents;
1144     bool copyUsingSlice = false;
1145     auto shapeOp = getOrReadExtentsAndShapeOp(loc, rewriter, load, extents,
1146                                               copyUsingSlice);
1147     auto [allocmem, genTempCleanUp] =
1148         allocateArrayTemp(loc, rewriter, load, extents, shapeOp);
1149     genArrayCopy</*copyIn=*/true>(load.getLoc(), rewriter, allocmem,
1150                                   load.getMemref(), shapeOp, load.getSlice(),
1151                                   load);
1152     // Generate the reference for the access.
1153     rewriter.setInsertionPoint(op);
1154     auto coor = genCoorOp(
1155         rewriter, loc, getEleTy(load.getType()), eleTy, allocmem, shapeOp,
1156         copyUsingSlice ? mlir::Value{} : load.getSlice(), access.getIndices(),
1157         load, access->hasAttr(factory::attrFortranArrayOffsets()));
1158     // Copy out.
1159     auto *storeOp = useMap.lookup(loadOp);
1160     auto store = mlir::cast<ArrayMergeStoreOp>(storeOp);
1161     rewriter.setInsertionPoint(storeOp);
1162     // Copy out.
1163     genArrayCopy</*copyIn=*/false>(store.getLoc(), rewriter, store.getMemref(),
1164                                    allocmem, shapeOp, store.getSlice(), load);
1165     genTempCleanUp(rewriter);
1166     return coor;
1167   }
1168
1169   /// Copy the RHS element into the LHS and insert copy-in/copy-out between a
1170   /// temp and the LHS if the analysis found potential overlaps between the RHS
1171   /// and LHS arrays. The element copy generator must be provided in \p
1172   /// assignElement. \p update must be the ArrayUpdateOp or the ArrayModifyOp.
1173   /// Returns the address of the LHS element inside the loop and the LHS
1174   /// ArrayLoad result.
1175   std::pair<mlir::Value, mlir::Value>
1176   materializeAssignment(mlir::Location loc, mlir::PatternRewriter &rewriter,
1177                         ArrayOp update,
1178                         const std::function<void(mlir::Value)> &assignElement,
1179                         mlir::Type lhsEltRefType) const {
1180     auto *op = update.getOperation();
1181     auto *loadOp = useMap.lookup(op);
1182     auto load = mlir::cast<ArrayLoadOp>(loadOp);
1183     LLVM_DEBUG(llvm::outs() << "does " << load << " have a conflict?\n");
1184     if (analysis.hasPotentialConflict(loadOp)) {
1185       // If there is a conflict between the arrays, then we copy the lhs array
1186       // to a temporary, update the temporary, and copy the temporary back to
1187       // the lhs array. This yields Fortran's copy-in copy-out array semantics.
1188       LLVM_DEBUG(llvm::outs() << "Yes, conflict was found\n");
1189       rewriter.setInsertionPoint(loadOp);
1190       // Copy in.
1191       llvm::SmallVector<mlir::Value> extents;
1192       bool copyUsingSlice = false;
1193       auto shapeOp = getOrReadExtentsAndShapeOp(loc, rewriter, load, extents,
1194                                                 copyUsingSlice);
1195       auto [allocmem, genTempCleanUp] =
1196           allocateArrayTemp(loc, rewriter, load, extents, shapeOp);
1197
1198       genArrayCopy</*copyIn=*/true>(load.getLoc(), rewriter, allocmem,
1199                                     load.getMemref(), shapeOp, load.getSlice(),
1200                                     load);
1201       rewriter.setInsertionPoint(op);
1202       auto coor = genCoorOp(
1203           rewriter, loc, getEleTy(load.getType()), lhsEltRefType, allocmem,
1204           shapeOp, copyUsingSlice ? mlir::Value{} : load.getSlice(),
1205           update.getIndices(), load,
1206           update->hasAttr(factory::attrFortranArrayOffsets()));
1207       assignElement(coor);
1208       auto *storeOp = useMap.lookup(loadOp);
1209       auto store = mlir::cast<ArrayMergeStoreOp>(storeOp);
1210       rewriter.setInsertionPoint(storeOp);
1211       // Copy out.
1212       genArrayCopy</*copyIn=*/false>(store.getLoc(), rewriter,
1213                                      store.getMemref(), allocmem, shapeOp,
1214                                      store.getSlice(), load);
1215       genTempCleanUp(rewriter);
1216       return {coor, load.getResult()};
1217     }
1218     // Otherwise, when there is no conflict (a possible loop-carried
1219     // dependence), the lhs array can be updated in place.
1220     LLVM_DEBUG(llvm::outs() << "No, conflict wasn't found\n");
1221     rewriter.setInsertionPoint(op);
1222     auto coorTy = getEleTy(load.getType());
1223     auto coor =
1224         genCoorOp(rewriter, loc, coorTy, lhsEltRefType, load.getMemref(),
1225                   load.getShape(), load.getSlice(), update.getIndices(), load,
1226                   update->hasAttr(factory::attrFortranArrayOffsets()));
1227     assignElement(coor);
1228     return {coor, load.getResult()};
1229   }
1230
1231 protected:
1232   const ArrayCopyAnalysisBase &analysis;
1233   const OperationUseMapT &useMap;
1234 };
1235
1236 class ArrayUpdateConversion : public ArrayUpdateConversionBase<ArrayUpdateOp> {
1237 public:
1238   explicit ArrayUpdateConversion(mlir::MLIRContext *ctx,
1239                                  const ArrayCopyAnalysisBase &a,
1240                                  const OperationUseMapT &m)
1241       : ArrayUpdateConversionBase{ctx, a, m} {}
1242
1243   mlir::LogicalResult
1244   matchAndRewrite(ArrayUpdateOp update,
1245                   mlir::PatternRewriter &rewriter) const override {
1246     auto loc = update.getLoc();
1247     auto assignElement = [&](mlir::Value coor) {
1248       auto input = update.getMerge();
1249       if (auto inEleTy = dyn_cast_ptrEleTy(input.getType())) {
1250         emitFatalError(loc, "array_update on references not supported");
1251       } else {
1252         rewriter.create<fir::StoreOp>(loc, input, coor);
1253       }
1254     };
1255     auto lhsEltRefType = toRefType(update.getMerge().getType());
1256     auto [_, lhsLoadResult] = materializeAssignment(
1257         loc, rewriter, update, assignElement, lhsEltRefType);
1258     update.replaceAllUsesWith(lhsLoadResult);
1259     rewriter.replaceOp(update, lhsLoadResult);
1260     return mlir::success();
1261   }
1262 };
1263
1264 class ArrayModifyConversion : public ArrayUpdateConversionBase<ArrayModifyOp> {
1265 public:
1266   explicit ArrayModifyConversion(mlir::MLIRContext *ctx,
1267                                  const ArrayCopyAnalysisBase &a,
1268                                  const OperationUseMapT &m)
1269       : ArrayUpdateConversionBase{ctx, a, m} {}
1270
1271   mlir::LogicalResult
1272   matchAndRewrite(ArrayModifyOp modify,
1273                   mlir::PatternRewriter &rewriter) const override {
1274     auto loc = modify.getLoc();
1275     auto assignElement = [](mlir::Value) {
1276       // Assignment already materialized by lowering using lhs element address.
1277     };
1278     auto lhsEltRefType = modify.getResult(0).getType();
1279     auto [lhsEltCoor, lhsLoadResult] = materializeAssignment(
1280         loc, rewriter, modify, assignElement, lhsEltRefType);
1281     modify.replaceAllUsesWith(mlir::ValueRange{lhsEltCoor, lhsLoadResult});
1282     rewriter.replaceOp(modify, mlir::ValueRange{lhsEltCoor, lhsLoadResult});
1283     return mlir::success();
1284   }
1285 };
1286
1287 class ArrayFetchConversion : public mlir::OpRewritePattern<ArrayFetchOp> {
1288 public:
1289   explicit ArrayFetchConversion(mlir::MLIRContext *ctx,
1290                                 const OperationUseMapT &m)
1291       : OpRewritePattern{ctx}, useMap{m} {}
1292
1293   mlir::LogicalResult
1294   matchAndRewrite(ArrayFetchOp fetch,
1295                   mlir::PatternRewriter &rewriter) const override {
1296     auto *op = fetch.getOperation();
1297     rewriter.setInsertionPoint(op);
1298     auto load = mlir::cast<ArrayLoadOp>(useMap.lookup(op));
1299     auto loc = fetch.getLoc();
1300     auto coor = genCoorOp(
1301         rewriter, loc, getEleTy(load.getType()), toRefType(fetch.getType()),
1302         load.getMemref(), load.getShape(), load.getSlice(), fetch.getIndices(),
1303         load, fetch->hasAttr(factory::attrFortranArrayOffsets()));
1304     if (isa_ref_type(fetch.getType()))
1305       rewriter.replaceOp(fetch, coor);
1306     else
1307       rewriter.replaceOpWithNewOp<fir::LoadOp>(fetch, coor);
1308     return mlir::success();
1309   }
1310
1311 private:
1312   const OperationUseMapT &useMap;
1313 };
1314
1315 /// As array_access op is like an array_fetch op, except that it does not imply
1316 /// a load op. (It operates in the reference domain.)
1317 class ArrayAccessConversion : public ArrayUpdateConversionBase<ArrayAccessOp> {
1318 public:
1319   explicit ArrayAccessConversion(mlir::MLIRContext *ctx,
1320                                  const ArrayCopyAnalysisBase &a,
1321                                  const OperationUseMapT &m)
1322       : ArrayUpdateConversionBase{ctx, a, m} {}
1323
1324   mlir::LogicalResult
1325   matchAndRewrite(ArrayAccessOp access,
1326                   mlir::PatternRewriter &rewriter) const override {
1327     auto *op = access.getOperation();
1328     auto loc = access.getLoc();
1329     if (analysis.inAmendAccessSet(op)) {
1330       // This array_access is associated with an array_amend and there is a
1331       // conflict. Make a copy to store into.
1332       auto result = referenceToClone(loc, rewriter, access);
1333       access.replaceAllUsesWith(result);
1334       rewriter.replaceOp(access, result);
1335       return mlir::success();
1336     }
1337     rewriter.setInsertionPoint(op);
1338     auto load = mlir::cast<ArrayLoadOp>(useMap.lookup(op));
1339     auto coor = genCoorOp(
1340         rewriter, loc, getEleTy(load.getType()), toRefType(access.getType()),
1341         load.getMemref(), load.getShape(), load.getSlice(), access.getIndices(),
1342         load, access->hasAttr(factory::attrFortranArrayOffsets()));
1343     rewriter.replaceOp(access, coor);
1344     return mlir::success();
1345   }
1346 };
1347
1348 /// An array_amend op is a marker to record which array access is being used to
1349 /// update an array value. After this pass runs, an array_amend has no
1350 /// semantics. We rewrite these to undefined values here to remove them while
1351 /// preserving SSA form.
1352 class ArrayAmendConversion : public mlir::OpRewritePattern<ArrayAmendOp> {
1353 public:
1354   explicit ArrayAmendConversion(mlir::MLIRContext *ctx)
1355       : OpRewritePattern{ctx} {}
1356
1357   mlir::LogicalResult
1358   matchAndRewrite(ArrayAmendOp amend,
1359                   mlir::PatternRewriter &rewriter) const override {
1360     auto *op = amend.getOperation();
1361     rewriter.setInsertionPoint(op);
1362     auto loc = amend.getLoc();
1363     auto undef = rewriter.create<UndefOp>(loc, amend.getType());
1364     rewriter.replaceOp(amend, undef.getResult());
1365     return mlir::success();
1366   }
1367 };
1368
1369 class ArrayValueCopyConverter
1370     : public fir::impl::ArrayValueCopyBase<ArrayValueCopyConverter> {
1371 public:
1372   ArrayValueCopyConverter() = default;
1373   ArrayValueCopyConverter(const fir::ArrayValueCopyOptions &options)
1374       : Base(options) {}
1375
1376   void runOnOperation() override {
1377     auto func = getOperation();
1378     LLVM_DEBUG(llvm::dbgs() << "\n\narray-value-copy pass on function '"
1379                             << func.getName() << "'\n");
1380     auto *context = &getContext();
1381
1382     // Perform the conflict analysis.
1383     const ArrayCopyAnalysisBase *analysis;
1384     if (optimizeConflicts)
1385       analysis = &getAnalysis<ArrayCopyAnalysisOptimized>();
1386     else
1387       analysis = &getAnalysis<ArrayCopyAnalysis>();
1388
1389     const auto &useMap = analysis->getUseMap();
1390
1391     mlir::RewritePatternSet patterns1(context);
1392     patterns1.insert<ArrayFetchConversion>(context, useMap);
1393     patterns1.insert<ArrayUpdateConversion>(context, *analysis, useMap);
1394     patterns1.insert<ArrayModifyConversion>(context, *analysis, useMap);
1395     patterns1.insert<ArrayAccessConversion>(context, *analysis, useMap);
1396     patterns1.insert<ArrayAmendConversion>(context);
1397     mlir::ConversionTarget target(*context);
1398     target
1399         .addLegalDialect<FIROpsDialect, mlir::scf::SCFDialect,
1400                          mlir::arith::ArithDialect, mlir::func::FuncDialect>();
1401     target.addIllegalOp<ArrayAccessOp, ArrayAmendOp, ArrayFetchOp,
1402                         ArrayUpdateOp, ArrayModifyOp>();
1403     // Rewrite the array fetch and array update ops.
1404     if (mlir::failed(
1405             mlir::applyPartialConversion(func, target, std::move(patterns1)))) {
1406       mlir::emitError(mlir::UnknownLoc::get(context),
1407                       "failure in array-value-copy pass, phase 1");
1408       signalPassFailure();
1409     }
1410
1411     mlir::RewritePatternSet patterns2(context);
1412     patterns2.insert<ArrayLoadConversion>(context);
1413     patterns2.insert<ArrayMergeStoreConversion>(context);
1414     target.addIllegalOp<ArrayLoadOp, ArrayMergeStoreOp>();
1415     if (mlir::failed(
1416             mlir::applyPartialConversion(func, target, std::move(patterns2)))) {
1417       mlir::emitError(mlir::UnknownLoc::get(context),
1418                       "failure in array-value-copy pass, phase 2");
1419       signalPassFailure();
1420     }
1421   }
1422 };
1423 } // namespace
1424
1425 std::unique_ptr<mlir::Pass>
1426 fir::createArrayValueCopyPass(fir::ArrayValueCopyOptions options) {
1427   return std::make_unique<ArrayValueCopyConverter>(options);
1428 }