flang/lib/Optimizer/Transforms/LoopVersioning.cpp

   1 //===- LoopVersioning.cpp -------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 //===----------------------------------------------------------------------===//
  10 /// \file
  11 /// This pass looks for loops iterating over assumed-shape arrays, that can
  12 /// be optimized by "guessing" that the stride is element-sized.
  13 ///
  14 /// This is done by creating two versions of the same loop: one which assumes
  15 /// that the elements are contiguous (stride == size of element), and one that
  16 /// is the original generic loop.
  17 ///
  18 /// As a side-effect of the assumed element size stride, the array is also
  19 /// flattened to make it a 1D array - this is because the internal array
  20 /// structure must be either 1D or have known sizes in all dimensions - and at
  21 /// least one of the dimensions here is already unknown.
  22 ///
  23 /// There are two distinct benefits here:
  24 /// 1. The loop that iterates over the elements is somewhat simplified by the
  25 ///    constant stride calculation.
  26 /// 2. Since the compiler can understand the size of the stride, it can use
  27 ///    vector instructions, where an unknown (at compile time) stride does often
  28 ///    prevent vector operations from being used.
  29 ///
  30 /// A known drawback is that the code-size is increased, in some cases that can
  31 /// be quite substantial - 3-4x is quite plausible (this includes that the loop
  32 /// gets vectorized, which in itself often more than doubles the size of the
  33 /// code, because unless the loop size is known, there will be a modulo
  34 /// vector-size remainder to deal with.
  35 ///
  36 /// TODO: Do we need some size limit where loops no longer get duplicated?
  37 //        Maybe some sort of cost analysis.
  38 /// TODO: Should some loop content - for example calls to functions and
  39 ///       subroutines inhibit the versioning of the loops. Plausibly, this
  40 ///       could be part of the cost analysis above.
  41 //===----------------------------------------------------------------------===//
  42
  43 #include "flang/ISO_Fortran_binding_wrapper.h"
  44 #include "flang/Optimizer/Builder/BoxValue.h"
  45 #include "flang/Optimizer/Builder/FIRBuilder.h"
  46 #include "flang/Optimizer/Builder/Runtime/Inquiry.h"
  47 #include "flang/Optimizer/Dialect/FIRDialect.h"
  48 #include "flang/Optimizer/Dialect/FIROps.h"
  49 #include "flang/Optimizer/Dialect/FIRType.h"
  50 #include "flang/Optimizer/Dialect/Support/FIRContext.h"
  51 #include "flang/Optimizer/Dialect/Support/KindMapping.h"
  52 #include "flang/Optimizer/Support/DataLayout.h"
  53 #include "flang/Optimizer/Transforms/Passes.h"
  54 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
  55 #include "mlir/IR/Dominance.h"
  56 #include "mlir/IR/Matchers.h"
  57 #include "mlir/IR/TypeUtilities.h"
  58 #include "mlir/Pass/Pass.h"
  59 #include "mlir/Transforms/DialectConversion.h"
  60 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
  61 #include "mlir/Transforms/RegionUtils.h"
  62 #include "llvm/Support/Debug.h"
  63 #include "llvm/Support/raw_ostream.h"
  64
  65 #include <algorithm>
  66
  67 namespace fir {
  68 #define GEN_PASS_DEF_LOOPVERSIONING
  69 #include "flang/Optimizer/Transforms/Passes.h.inc"
  70 } // namespace fir
  71
  72 #define DEBUG_TYPE "flang-loop-versioning"
  73
  74 namespace {
  75
  76 class LoopVersioningPass
  77     : public fir::impl::LoopVersioningBase<LoopVersioningPass> {
  78 public:
  79   void runOnOperation() override;
  80 };
  81
  82 /// @struct ArgInfo
  83 /// A structure to hold an argument, the size of the argument and dimension
  84 /// information.
  85 struct ArgInfo {
  86   mlir::Value arg;
  87   size_t size;
  88   unsigned rank;
  89   fir::BoxDimsOp dims[CFI_MAX_RANK];
  90 };
  91
  92 /// @struct ArgsUsageInLoop
  93 /// A structure providing information about the function arguments
  94 /// usage by the instructions immediately nested in a loop.
  95 struct ArgsUsageInLoop {
  96   /// Mapping between the memref operand of an array indexing
  97   /// operation (e.g. fir.coordinate_of) and the argument information.
  98   llvm::DenseMap<mlir::Value, ArgInfo> usageInfo;
  99   /// Some array indexing operations inside a loop cannot be transformed.
 100   /// This vector holds the memref operands of such operations.
 101   /// The vector is used to make sure that we do not try to transform
 102   /// any outer loop, since this will imply the operation rewrite
 103   /// in this loop.
 104   llvm::SetVector<mlir::Value> cannotTransform;
 105
 106   // Debug dump of the structure members assuming that
 107   // the information has been collected for the given loop.
 108   void dump(fir::DoLoopOp loop) const {
 109     LLVM_DEBUG({
 110       mlir::OpPrintingFlags printFlags;
 111       printFlags.skipRegions();
 112       llvm::dbgs() << "Arguments usage info for loop:\n";
 113       loop.print(llvm::dbgs(), printFlags);
 114       llvm::dbgs() << "\nUsed args:\n";
 115       for (auto &use : usageInfo) {
 116         mlir::Value v = use.first;
 117         v.print(llvm::dbgs(), printFlags);
 118         llvm::dbgs() << "\n";
 119       }
 120       llvm::dbgs() << "\nCannot transform args:\n";
 121       for (mlir::Value arg : cannotTransform) {
 122         arg.print(llvm::dbgs(), printFlags);
 123         llvm::dbgs() << "\n";
 124       }
 125       llvm::dbgs() << "====\n";
 126     });
 127   }
 128
 129   // Erase usageInfo and cannotTransform entries for a set
 130   // of given arguments.
 131   void eraseUsage(const llvm::SetVector<mlir::Value> &args) {
 132     for (auto &arg : args)
 133       usageInfo.erase(arg);
 134     cannotTransform.set_subtract(args);
 135   }
 136
 137   // Erase usageInfo and cannotTransform entries for a set
 138   // of given arguments provided in the form of usageInfo map.
 139   void eraseUsage(const llvm::DenseMap<mlir::Value, ArgInfo> &args) {
 140     for (auto &arg : args) {
 141       usageInfo.erase(arg.first);
 142       cannotTransform.remove(arg.first);
 143     }
 144   }
 145 };
 146 } // namespace
 147
 148 static fir::SequenceType getAsSequenceType(mlir::Value *v) {
 149   mlir::Type argTy = fir::unwrapPassByRefType(fir::unwrapRefType(v->getType()));
 150   return mlir::dyn_cast<fir::SequenceType>(argTy);
 151 }
 152
 153 /// if a value comes from a fir.declare, follow it to the original source,
 154 /// otherwise return the value
 155 static mlir::Value unwrapFirDeclare(mlir::Value val) {
 156   // fir.declare is for source code variables. We don't have declares of
 157   // declares
 158   if (fir::DeclareOp declare = val.getDefiningOp<fir::DeclareOp>())
 159     return declare.getMemref();
 160   return val;
 161 }
 162
 163 /// if a value comes from a fir.rebox, follow the rebox to the original source,
 164 /// of the value, otherwise return the value
 165 static mlir::Value unwrapReboxOp(mlir::Value val) {
 166   // don't support reboxes of reboxes
 167   if (fir::ReboxOp rebox = val.getDefiningOp<fir::ReboxOp>())
 168     val = rebox.getBox();
 169   return val;
 170 }
 171
 172 /// normalize a value (removing fir.declare and fir.rebox) so that we can
 173 /// more conveniently spot values which came from function arguments
 174 static mlir::Value normaliseVal(mlir::Value val) {
 175   return unwrapFirDeclare(unwrapReboxOp(val));
 176 }
 177
 178 /// some FIR operations accept a fir.shape, a fir.shift or a fir.shapeshift.
 179 /// fir.shift and fir.shapeshift allow us to extract lower bounds
 180 /// if lowerbounds cannot be found, return nullptr
 181 static mlir::Value tryGetLowerBoundsFromShapeLike(mlir::Value shapeLike,
 182                                                   unsigned dim) {
 183   mlir::Value lowerBound{nullptr};
 184   if (auto shift = shapeLike.getDefiningOp<fir::ShiftOp>())
 185     lowerBound = shift.getOrigins()[dim];
 186   if (auto shapeShift = shapeLike.getDefiningOp<fir::ShapeShiftOp>())
 187     lowerBound = shapeShift.getOrigins()[dim];
 188   return lowerBound;
 189 }
 190
 191 /// attempt to get the array lower bounds of dimension dim of the memref
 192 /// argument to a fir.array_coor op
 193 /// 0 <= dim < rank
 194 /// May return nullptr if no lower bounds can be determined
 195 static mlir::Value getLowerBound(fir::ArrayCoorOp coop, unsigned dim) {
 196   // 1) try to get from the shape argument to fir.array_coor
 197   if (mlir::Value shapeLike = coop.getShape())
 198     if (mlir::Value lb = tryGetLowerBoundsFromShapeLike(shapeLike, dim))
 199       return lb;
 200
 201   // It is important not to try to read the lower bound from the box, because
 202   // in the FIR lowering, boxes will sometimes contain incorrect lower bound
 203   // information
 204
 205   // out of ideas
 206   return {};
 207 }
 208
 209 /// gets the i'th index from array coordinate operation op
 210 /// dim should range between 0 and rank - 1
 211 static mlir::Value getIndex(fir::FirOpBuilder &builder, mlir::Operation *op,
 212                             unsigned dim) {
 213   if (fir::CoordinateOp coop = mlir::dyn_cast<fir::CoordinateOp>(op))
 214     return coop.getCoor()[dim];
 215
 216   fir::ArrayCoorOp coop = mlir::dyn_cast<fir::ArrayCoorOp>(op);
 217   assert(coop &&
 218          "operation must be either fir.coordiante_of or fir.array_coor");
 219
 220   // fir.coordinate_of indices start at 0: adjust these indices to match by
 221   // subtracting the lower bound
 222   mlir::Value index = coop.getIndices()[dim];
 223   mlir::Value lb = getLowerBound(coop, dim);
 224   if (!lb)
 225     // assume a default lower bound of one
 226     lb = builder.createIntegerConstant(coop.getLoc(), index.getType(), 1);
 227
 228   // index_0 = index - lb;
 229   if (lb.getType() != index.getType())
 230     lb = builder.createConvert(coop.getLoc(), index.getType(), lb);
 231   return builder.create<mlir::arith::SubIOp>(coop.getLoc(), index, lb);
 232 }
 233
 234 void LoopVersioningPass::runOnOperation() {
 235   LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n");
 236   mlir::func::FuncOp func = getOperation();
 237
 238   // First look for arguments with assumed shape = unknown extent in the lowest
 239   // dimension.
 240   LLVM_DEBUG(llvm::dbgs() << "Func-name:" << func.getSymName() << "\n");
 241   mlir::Block::BlockArgListType args = func.getArguments();
 242   mlir::ModuleOp module = func->getParentOfType<mlir::ModuleOp>();
 243   fir::KindMapping kindMap = fir::getKindMapping(module);
 244   mlir::SmallVector<ArgInfo, 4> argsOfInterest;
 245   std::optional<mlir::DataLayout> dl =
 246       fir::support::getOrSetDataLayout(module, /*allowDefaultLayout=*/false);
 247   if (!dl)
 248     mlir::emitError(module.getLoc(),
 249                     "data layout attribute is required to perform " DEBUG_TYPE
 250                     "pass");
 251   for (auto &arg : args) {
 252     // Optional arguments must be checked for IsPresent before
 253     // looking for the bounds. They are unsupported for the time being.
 254     if (func.getArgAttrOfType<mlir::UnitAttr>(arg.getArgNumber(),
 255                                               fir::getOptionalAttrName())) {
 256       LLVM_DEBUG(llvm::dbgs() << "OPTIONAL is not supported\n");
 257       continue;
 258     }
 259
 260     if (auto seqTy = getAsSequenceType(&arg)) {
 261       unsigned rank = seqTy.getDimension();
 262       if (rank > 0 &&
 263           seqTy.getShape()[0] == fir::SequenceType::getUnknownExtent()) {
 264         size_t typeSize = 0;
 265         mlir::Type elementType = fir::unwrapSeqOrBoxedSeqType(arg.getType());
 266         if (mlir::isa<mlir::FloatType>(elementType) ||
 267             mlir::isa<mlir::IntegerType>(elementType) ||
 268             mlir::isa<mlir::ComplexType>(elementType)) {
 269           auto [eleSize, eleAlign] = fir::getTypeSizeAndAlignmentOrCrash(
 270               arg.getLoc(), elementType, *dl, kindMap);
 271           typeSize = llvm::alignTo(eleSize, eleAlign);
 272         }
 273         if (typeSize)
 274           argsOfInterest.push_back({arg, typeSize, rank, {}});
 275         else
 276           LLVM_DEBUG(llvm::dbgs() << "Type not supported\n");
 277       }
 278     }
 279   }
 280
 281   if (argsOfInterest.empty()) {
 282     LLVM_DEBUG(llvm::dbgs()
 283                << "No suitable arguments.\n=== End " DEBUG_TYPE " ===\n");
 284     return;
 285   }
 286
 287   // A list of all loops in the function in post-order.
 288   mlir::SmallVector<fir::DoLoopOp> originalLoops;
 289   // Information about the arguments usage by the instructions
 290   // immediately nested in a loop.
 291   llvm::DenseMap<fir::DoLoopOp, ArgsUsageInLoop> argsInLoops;
 292
 293   auto &domInfo = getAnalysis<mlir::DominanceInfo>();
 294
 295   // Traverse the loops in post-order and see
 296   // if those arguments are used inside any loop.
 297   func.walk([&](fir::DoLoopOp loop) {
 298     mlir::Block &body = *loop.getBody();
 299     auto &argsInLoop = argsInLoops[loop];
 300     originalLoops.push_back(loop);
 301     body.walk([&](mlir::Operation *op) {
 302       // Support either fir.array_coor or fir.coordinate_of.
 303       if (!mlir::isa<fir::ArrayCoorOp, fir::CoordinateOp>(op))
 304         return;
 305       // Process only operations immediately nested in the current loop.
 306       if (op->getParentOfType<fir::DoLoopOp>() != loop)
 307         return;
 308       mlir::Value operand = op->getOperand(0);
 309       for (auto a : argsOfInterest) {
 310         if (a.arg == normaliseVal(operand)) {
 311           // Use the reboxed value, not the block arg when re-creating the loop.
 312           a.arg = operand;
 313
 314           // Check that the operand dominates the loop?
 315           // If this is the case, record such operands in argsInLoop.cannot-
 316           // Transform, so that they disable the transformation for the parent
 317           /// loops as well.
 318           if (!domInfo.dominates(a.arg, loop))
 319             argsInLoop.cannotTransform.insert(a.arg);
 320
 321           // No support currently for sliced arrays.
 322           // This means that we cannot transform properly
 323           // instructions referencing a.arg in the whole loop
 324           // nest this loop is located in.
 325           if (auto arrayCoor = mlir::dyn_cast<fir::ArrayCoorOp>(op))
 326             if (arrayCoor.getSlice())
 327               argsInLoop.cannotTransform.insert(a.arg);
 328
 329           if (argsInLoop.cannotTransform.contains(a.arg)) {
 330             // Remove any previously recorded usage, if any.
 331             argsInLoop.usageInfo.erase(a.arg);
 332             break;
 333           }
 334
 335           // Record the a.arg usage, if not recorded yet.
 336           argsInLoop.usageInfo.try_emplace(a.arg, a);
 337           break;
 338         }
 339       }
 340     });
 341   });
 342
 343   // Dump loops info after initial collection.
 344   LLVM_DEBUG({
 345     llvm::dbgs() << "Initial usage info:\n";
 346     for (fir::DoLoopOp loop : originalLoops) {
 347       auto &argsInLoop = argsInLoops[loop];
 348       argsInLoop.dump(loop);
 349     }
 350   });
 351
 352   // Clear argument usage for parent loops if an inner loop
 353   // contains a non-transformable usage.
 354   for (fir::DoLoopOp loop : originalLoops) {
 355     auto &argsInLoop = argsInLoops[loop];
 356     if (argsInLoop.cannotTransform.empty())
 357       continue;
 358
 359     fir::DoLoopOp parent = loop;
 360     while ((parent = parent->getParentOfType<fir::DoLoopOp>()))
 361       argsInLoops[parent].eraseUsage(argsInLoop.cannotTransform);
 362   }
 363
 364   // If an argument access can be optimized in a loop and
 365   // its descendant loop, then it does not make sense to
 366   // generate the contiguity check for the descendant loop.
 367   // The check will be produced as part of the ancestor
 368   // loop's transformation. So we can clear the argument
 369   // usage for all descendant loops.
 370   for (fir::DoLoopOp loop : originalLoops) {
 371     auto &argsInLoop = argsInLoops[loop];
 372     if (argsInLoop.usageInfo.empty())
 373       continue;
 374
 375     loop.getBody()->walk([&](fir::DoLoopOp dloop) {
 376       argsInLoops[dloop].eraseUsage(argsInLoop.usageInfo);
 377     });
 378   }
 379
 380   LLVM_DEBUG({
 381     llvm::dbgs() << "Final usage info:\n";
 382     for (fir::DoLoopOp loop : originalLoops) {
 383       auto &argsInLoop = argsInLoops[loop];
 384       argsInLoop.dump(loop);
 385     }
 386   });
 387
 388   // Reduce the collected information to a list of loops
 389   // with attached arguments usage information.
 390   // The list must hold the loops in post order, so that
 391   // the inner loops are transformed before the outer loops.
 392   struct OpsWithArgs {
 393     mlir::Operation *op;
 394     mlir::SmallVector<ArgInfo, 4> argsAndDims;
 395   };
 396   mlir::SmallVector<OpsWithArgs, 4> loopsOfInterest;
 397   for (fir::DoLoopOp loop : originalLoops) {
 398     auto &argsInLoop = argsInLoops[loop];
 399     if (argsInLoop.usageInfo.empty())
 400       continue;
 401     OpsWithArgs info;
 402     info.op = loop;
 403     for (auto &arg : argsInLoop.usageInfo)
 404       info.argsAndDims.push_back(arg.second);
 405     loopsOfInterest.emplace_back(std::move(info));
 406   }
 407
 408   if (loopsOfInterest.empty()) {
 409     LLVM_DEBUG(llvm::dbgs()
 410                << "No loops to transform.\n=== End " DEBUG_TYPE " ===\n");
 411     return;
 412   }
 413
 414   // If we get here, there are loops to process.
 415   fir::FirOpBuilder builder{module, std::move(kindMap)};
 416   mlir::Location loc = builder.getUnknownLoc();
 417   mlir::IndexType idxTy = builder.getIndexType();
 418
 419   LLVM_DEBUG(llvm::dbgs() << "Module Before transformation:");
 420   LLVM_DEBUG(module->dump());
 421
 422   LLVM_DEBUG(llvm::dbgs() << "loopsOfInterest: " << loopsOfInterest.size()
 423                           << "\n");
 424   for (auto op : loopsOfInterest) {
 425     LLVM_DEBUG(op.op->dump());
 426     builder.setInsertionPoint(op.op);
 427
 428     mlir::Value allCompares = nullptr;
 429     // Ensure all of the arrays are unit-stride.
 430     for (auto &arg : op.argsAndDims) {
 431       // Fetch all the dimensions of the array, except the last dimension.
 432       // Always fetch the first dimension, however, so set ndims = 1 if
 433       // we have one dim
 434       unsigned ndims = arg.rank;
 435       for (unsigned i = 0; i < ndims; i++) {
 436         mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i);
 437         arg.dims[i] = builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy,
 438                                                      arg.arg, dimIdx);
 439       }
 440       // We only care about lowest order dimension, here.
 441       mlir::Value elemSize =
 442           builder.createIntegerConstant(loc, idxTy, arg.size);
 443       mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
 444           loc, mlir::arith::CmpIPredicate::eq, arg.dims[0].getResult(2),
 445           elemSize);
 446       if (!allCompares) {
 447         allCompares = cmp;
 448       } else {
 449         allCompares =
 450             builder.create<mlir::arith::AndIOp>(loc, cmp, allCompares);
 451       }
 452     }
 453
 454     auto ifOp =
 455         builder.create<fir::IfOp>(loc, op.op->getResultTypes(), allCompares,
 456                                   /*withElse=*/true);
 457     builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
 458
 459     LLVM_DEBUG(llvm::dbgs() << "Creating cloned loop\n");
 460     mlir::Operation *clonedLoop = op.op->clone();
 461     bool changed = false;
 462     for (auto &arg : op.argsAndDims) {
 463       fir::SequenceType::Shape newShape;
 464       newShape.push_back(fir::SequenceType::getUnknownExtent());
 465       auto elementType = fir::unwrapSeqOrBoxedSeqType(arg.arg.getType());
 466       mlir::Type arrTy = fir::SequenceType::get(newShape, elementType);
 467       mlir::Type boxArrTy = fir::BoxType::get(arrTy);
 468       mlir::Type refArrTy = builder.getRefType(arrTy);
 469       auto carg = builder.create<fir::ConvertOp>(loc, boxArrTy, arg.arg);
 470       auto caddr = builder.create<fir::BoxAddrOp>(loc, refArrTy, carg);
 471       auto insPt = builder.saveInsertionPoint();
 472       // Use caddr instead of arg.
 473       clonedLoop->walk([&](mlir::Operation *coop) {
 474         if (!mlir::isa<fir::CoordinateOp, fir::ArrayCoorOp>(coop))
 475           return;
 476         // Reduce the multi-dimensioned index to a single index.
 477         // This is required becase fir arrays do not support multiple dimensions
 478         // with unknown dimensions at compile time.
 479         // We then calculate the multidimensional array like this:
 480         // arr(x, y, z) bedcomes arr(z * stride(2) + y * stride(1) + x)
 481         // where stride is the distance between elements in the dimensions
 482         // 0, 1 and 2 or x, y and z.
 483         if (coop->getOperand(0) == arg.arg && coop->getOperands().size() >= 2) {
 484           builder.setInsertionPoint(coop);
 485           mlir::Value totalIndex;
 486           for (unsigned i = arg.rank - 1; i > 0; i--) {
 487             mlir::Value curIndex =
 488                 builder.createConvert(loc, idxTy, getIndex(builder, coop, i));
 489             // Multiply by the stride of this array. Later we'll divide by the
 490             // element size.
 491             mlir::Value scale =
 492                 builder.createConvert(loc, idxTy, arg.dims[i].getResult(2));
 493             curIndex =
 494                 builder.create<mlir::arith::MulIOp>(loc, scale, curIndex);
 495             totalIndex = (totalIndex) ? builder.create<mlir::arith::AddIOp>(
 496                                             loc, curIndex, totalIndex)
 497                                       : curIndex;
 498           }
 499           // This is the lowest dimension - which doesn't need scaling
 500           mlir::Value finalIndex =
 501               builder.createConvert(loc, idxTy, getIndex(builder, coop, 0));
 502           if (totalIndex) {
 503             assert(llvm::isPowerOf2_32(arg.size) &&
 504                    "Expected power of two here");
 505             unsigned bits = llvm::Log2_32(arg.size);
 506             mlir::Value elemShift =
 507                 builder.createIntegerConstant(loc, idxTy, bits);
 508             totalIndex = builder.create<mlir::arith::AddIOp>(
 509                 loc,
 510                 builder.create<mlir::arith::ShRSIOp>(loc, totalIndex,
 511                                                      elemShift),
 512                 finalIndex);
 513           } else {
 514             totalIndex = finalIndex;
 515           }
 516           auto newOp = builder.create<fir::CoordinateOp>(
 517               loc, builder.getRefType(elementType), caddr,
 518               mlir::ValueRange{totalIndex});
 519           LLVM_DEBUG(newOp->dump());
 520           coop->getResult(0).replaceAllUsesWith(newOp->getResult(0));
 521           coop->erase();
 522           changed = true;
 523         }
 524       });
 525
 526       builder.restoreInsertionPoint(insPt);
 527     }
 528     assert(changed && "Expected operations to have changed");
 529
 530     builder.insert(clonedLoop);
 531     // Forward the result(s), if any, from the loop operation to the
 532     //
 533     mlir::ResultRange results = clonedLoop->getResults();
 534     bool hasResults = (results.size() > 0);
 535     if (hasResults)
 536       builder.create<fir::ResultOp>(loc, results);
 537
 538     // Add the original loop in the else-side of the if operation.
 539     builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
 540     op.op->replaceAllUsesWith(ifOp);
 541     op.op->remove();
 542     builder.insert(op.op);
 543     // Rely on "cloned loop has results, so original loop also has results".
 544     if (hasResults) {
 545       builder.create<fir::ResultOp>(loc, op.op->getResults());
 546     } else {
 547       // Use an assert to check this.
 548       assert(op.op->getResults().size() == 0 &&
 549              "Weird, the cloned loop doesn't have results, but the original "
 550              "does?");
 551     }
 552   }
 553
 554   LLVM_DEBUG(llvm::dbgs() << "After transform:\n");
 555   LLVM_DEBUG(module->dump());
 556
 557   LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
 558 }