libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp

   1 //===-- Generate random but valid function descriptors  -------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "automemcpy/RandomFunctionGenerator.h"
  10
  11 #include <llvm/ADT/None.h>
  12 #include <llvm/ADT/StringRef.h>
  13 #include <llvm/Support/raw_ostream.h>
  14
  15 #include <set>
  16
  17 namespace llvm {
  18 namespace automemcpy {
  19
  20 // Exploration parameters
  21 // ----------------------
  22 // Here we define a set of values that will contraint the exploration and
  23 // limit combinatorial explosion.
  24
  25 // We limit the number of cases for individual sizes to sizes up to 4.
  26 // More individual sizes don't bring much over the overlapping strategy.
  27 static constexpr int kMaxIndividualSize = 4;
  28
  29 // We limit Overlapping Strategy to sizes up to 256.
  30 // An overlap of 256B means accessing 128B at once which is usually not
  31 // feasible by current CPUs. We rely on the compiler to generate multiple
  32 // loads/stores if needed but higher sizes are unlikely to benefit from hardware
  33 // acceleration.
  34 static constexpr int kMaxOverlapSize = 256;
  35
  36 // For the loop strategies, we make sure that they iterate at least a certain
  37 // number of times to amortize the cost of looping.
  38 static constexpr int kLoopMinIter = 3;
  39 static constexpr int kAlignedLoopMinIter = 2;
  40
  41 // We restrict the size of the block of data to handle in a loop.
  42 // Generally speaking block size <= 16 perform poorly.
  43 static constexpr int kLoopBlockSize[] = {16, 32, 64};
  44
  45 // We restrict alignment to the following values.
  46 static constexpr int kLoopAlignments[] = {16, 32, 64};
  47
  48 // We make sure that the region bounds are one of the following values.
  49 static constexpr int kAnchors[] = {0,  1,  2,   4,   8,   16,   32,      48,
  50                                    64, 96, 128, 256, 512, 1024, kMaxSize};
  51
  52 // We also allow disabling loops, aligned loops and accelerators.
  53 static constexpr bool kDisableLoop = false;
  54 static constexpr bool kDisableAlignedLoop = false;
  55 static constexpr bool kDisableAccelerator = false;
  56
  57 // For memcpy, we can also explore whether aligning on source or destination has
  58 // an effect.
  59 static constexpr bool kExploreAlignmentArg = true;
  60
  61 // The function we generate code for.
  62 // BCMP is specifically disabled for now.
  63 static constexpr int kFunctionTypes[] = {
  64     (int)FunctionType::MEMCPY,
  65     (int)FunctionType::MEMCMP,
  66     //  (int)FunctionType::BCMP,
  67     (int)FunctionType::MEMSET,
  68     (int)FunctionType::BZERO,
  69 };
  70
  71 // The actual implementation of each function can be handled via primitive types
  72 // (SCALAR), vector types where available (NATIVE) or by the compiler (BUILTIN).
  73 // We want to move toward delegating the code generation entirely to the
  74 // compiler but for now we have to make use of -per microarchitecture- custom
  75 // implementations. Scalar being more portable but also less performant, we
  76 // remove it as well.
  77 static constexpr int kElementClasses[] = {
  78     // (int)ElementTypeClass::SCALAR,
  79     (int)ElementTypeClass::NATIVE,
  80     // (int)ElementTypeClass::BUILTIN
  81 };
  82
  83 RandomFunctionGenerator::RandomFunctionGenerator()
  84     : Solver(Context), Type(Context.int_const("Type")),
  85       ContiguousBegin(Context.int_const("ContiguousBegin")),
  86       ContiguousEnd(Context.int_const("ContiguousEnd")),
  87       OverlapBegin(Context.int_const("OverlapBegin")),
  88       OverlapEnd(Context.int_const("OverlapEnd")),
  89       LoopBegin(Context.int_const("LoopBegin")),
  90       LoopEnd(Context.int_const("LoopEnd")),
  91       LoopBlockSize(Context.int_const("LoopBlockSize")),
  92       AlignedLoopBegin(Context.int_const("AlignedLoopBegin")),
  93       AlignedLoopEnd(Context.int_const("AlignedLoopEnd")),
  94       AlignedLoopBlockSize(Context.int_const("AlignedLoopBlockSize")),
  95       AlignedAlignment(Context.int_const("AlignedAlignment")),
  96       AlignedArg(Context.int_const("AlignedArg")),
  97       AcceleratorBegin(Context.int_const("AcceleratorBegin")),
  98       AcceleratorEnd(Context.int_const("AcceleratorEnd")),
  99       ElementClass(Context.int_const("ElementClass")) {
 100   // All possible functions.
 101   Solver.add(inSetConstraint(Type, kFunctionTypes));
 102
 103   // Add constraints for region bounds.
 104   addBoundsAndAnchors(ContiguousBegin, ContiguousEnd);
 105   addBoundsAndAnchors(OverlapBegin, OverlapEnd);
 106   addBoundsAndAnchors(LoopBegin, LoopEnd);
 107   addBoundsAndAnchors(AlignedLoopBegin, AlignedLoopEnd);
 108   addBoundsAndAnchors(AcceleratorBegin, AcceleratorEnd);
 109   // We always consider strategies in this order, and we
 110   // always end with the `Accelerator` strategy, as it's typically more
 111   // efficient for large sizes.
 112   // Contiguous <= Overlap <= Loop <= AlignedLoop <= Accelerator
 113   Solver.add(ContiguousEnd == OverlapBegin);
 114   Solver.add(OverlapEnd == LoopBegin);
 115   Solver.add(LoopEnd == AlignedLoopBegin);
 116   Solver.add(AlignedLoopEnd == AcceleratorBegin);
 117   // Fix endpoints: The minimum size that we want to copy is 0, and we always
 118   // start with the `Contiguous` strategy. The max size is `kMaxSize`.
 119   Solver.add(ContiguousBegin == 0);
 120   Solver.add(AcceleratorEnd == kMaxSize);
 121   // Contiguous
 122   Solver.add(ContiguousEnd <= kMaxIndividualSize + 1);
 123   // Overlap
 124   Solver.add(OverlapEnd <= kMaxOverlapSize + 1);
 125   // Overlap only ever makes sense when accessing multiple bytes at a time.
 126   // i.e. Overlap<1> is useless.
 127   Solver.add(OverlapBegin == OverlapEnd || OverlapBegin >= 2);
 128   // Loop
 129   addLoopConstraints(LoopBegin, LoopEnd, LoopBlockSize, kLoopMinIter);
 130   // Aligned Loop
 131   addLoopConstraints(AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize,
 132                      kAlignedLoopMinIter);
 133   Solver.add(inSetConstraint(AlignedAlignment, kLoopAlignments));
 134   Solver.add(AlignedLoopBegin == AlignedLoopEnd || AlignedLoopBegin >= 64);
 135   Solver.add(AlignedLoopBlockSize >= AlignedAlignment);
 136   Solver.add(AlignedLoopBlockSize >= LoopBlockSize);
 137   z3::expr IsMemcpy = Type == (int)FunctionType::MEMCPY;
 138   z3::expr ExploreAlignment = IsMemcpy && kExploreAlignmentArg;
 139   Solver.add(
 140       (ExploreAlignment &&
 141        inSetConstraint(AlignedArg, {(int)AlignArg::_1, (int)AlignArg::_2})) ||
 142       (!ExploreAlignment && AlignedArg == (int)AlignArg::_1));
 143   // Accelerator
 144   Solver.add(IsMemcpy ||
 145              (AcceleratorBegin ==
 146               AcceleratorEnd)); // Only Memcpy has accelerator for now.
 147   // Element classes
 148   Solver.add(inSetConstraint(ElementClass, kElementClasses));
 149
 150   if (kDisableLoop)
 151     Solver.add(LoopBegin == LoopEnd);
 152   if (kDisableAlignedLoop)
 153     Solver.add(AlignedLoopBegin == AlignedLoopEnd);
 154   if (kDisableAccelerator)
 155     Solver.add(AcceleratorBegin == AcceleratorEnd);
 156 }
 157
 158 // Creates SizeSpan from Begin/End values.
 159 // Returns llvm::None if Begin==End.
 160 static Optional<SizeSpan> AsSizeSpan(size_t Begin, size_t End) {
 161   if (Begin == End)
 162     return None;
 163   SizeSpan SS;
 164   SS.Begin = Begin;
 165   SS.End = End;
 166   return SS;
 167 }
 168
 169 // Generic method to create a `Region` struct with a Span or None if span is
 170 // empty.
 171 template <typename Region>
 172 static Optional<Region> As(size_t Begin, size_t End) {
 173   if (auto Span = AsSizeSpan(Begin, End)) {
 174     Region Output;
 175     Output.Span = *Span;
 176     return Output;
 177   }
 178   return None;
 179 }
 180
 181 // Returns a Loop struct or None if span is empty.
 182 static Optional<Loop> AsLoop(size_t Begin, size_t End, size_t BlockSize) {
 183   if (auto Span = AsSizeSpan(Begin, End)) {
 184     Loop Output;
 185     Output.Span = *Span;
 186     Output.BlockSize = BlockSize;
 187     return Output;
 188   }
 189   return None;
 190 }
 191
 192 // Returns an AlignedLoop struct or None if span is empty.
 193 static Optional<AlignedLoop> AsAlignedLoop(size_t Begin, size_t End,
 194                                            size_t BlockSize, size_t Alignment,
 195                                            AlignArg AlignTo) {
 196   if (auto Loop = AsLoop(Begin, End, BlockSize)) {
 197     AlignedLoop Output;
 198     Output.Loop = *Loop;
 199     Output.Alignment = Alignment;
 200     Output.AlignTo = AlignTo;
 201     return Output;
 202   }
 203   return None;
 204 }
 205
 206 Optional<FunctionDescriptor> RandomFunctionGenerator::next() {
 207   if (Solver.check() != z3::sat)
 208     return {};
 209
 210   z3::model m = Solver.get_model();
 211
 212   // Helper method to get the current numerical value of a z3::expr.
 213   const auto E = [&m](z3::expr &V) -> int {
 214     return m.eval(V).get_numeral_int();
 215   };
 216
 217   // Fill is the function descriptor to return.
 218   FunctionDescriptor R;
 219   R.Type = FunctionType(E(Type));
 220   R.Contiguous = As<Contiguous>(E(ContiguousBegin), E(ContiguousEnd));
 221   R.Overlap = As<Overlap>(E(OverlapBegin), E(OverlapEnd));
 222   R.Loop = AsLoop(E(LoopBegin), E(LoopEnd), E(LoopBlockSize));
 223   R.AlignedLoop = AsAlignedLoop(E(AlignedLoopBegin), E(AlignedLoopEnd),
 224                                 E(AlignedLoopBlockSize), E(AlignedAlignment),
 225                                 AlignArg(E(AlignedArg)));
 226   R.Accelerator = As<Accelerator>(E(AcceleratorBegin), E(AcceleratorEnd));
 227   R.ElementClass = ElementTypeClass(E(ElementClass));
 228
 229   // Express current state as a set of constraints.
 230   z3::expr CurrentLayout =
 231       (Type == E(Type)) && (ContiguousBegin == E(ContiguousBegin)) &&
 232       (ContiguousEnd == E(ContiguousEnd)) &&
 233       (OverlapBegin == E(OverlapBegin)) && (OverlapEnd == E(OverlapEnd)) &&
 234       (LoopBegin == E(LoopBegin)) && (LoopEnd == E(LoopEnd)) &&
 235       (LoopBlockSize == E(LoopBlockSize)) &&
 236       (AlignedLoopBegin == E(AlignedLoopBegin)) &&
 237       (AlignedLoopEnd == E(AlignedLoopEnd)) &&
 238       (AlignedLoopBlockSize == E(AlignedLoopBlockSize)) &&
 239       (AlignedAlignment == E(AlignedAlignment)) &&
 240       (AlignedArg == E(AlignedArg)) &&
 241       (AcceleratorBegin == E(AcceleratorBegin)) &&
 242       (AcceleratorEnd == E(AcceleratorEnd)) &&
 243       (ElementClass == E(ElementClass));
 244
 245   // Ask solver to never show this configuration ever again.
 246   Solver.add(!CurrentLayout);
 247   return R;
 248 }
 249
 250 // Make sure `Variable` is one of the provided values.
 251 z3::expr RandomFunctionGenerator::inSetConstraint(z3::expr &Variable,
 252                                                   ArrayRef<int> Values) const {
 253   z3::expr_vector Args(Variable.ctx());
 254   for (int Value : Values)
 255     Args.push_back(Variable == Value);
 256   return z3::mk_or(Args);
 257 }
 258
 259 void RandomFunctionGenerator::addBoundsAndAnchors(z3::expr &Begin,
 260                                                   z3::expr &End) {
 261   // Begin and End are picked amongst a set of predefined values.
 262   Solver.add(inSetConstraint(Begin, kAnchors));
 263   Solver.add(inSetConstraint(End, kAnchors));
 264   Solver.add(Begin >= 0);
 265   Solver.add(Begin <= End);
 266   Solver.add(End <= kMaxSize);
 267 }
 268
 269 void RandomFunctionGenerator::addLoopConstraints(const z3::expr &LoopBegin,
 270                                                  const z3::expr &LoopEnd,
 271                                                  z3::expr &LoopBlockSize,
 272                                                  int LoopMinIter) {
 273   Solver.add(inSetConstraint(LoopBlockSize, kLoopBlockSize));
 274   Solver.add(LoopBegin == LoopEnd ||
 275              (LoopBegin > (LoopMinIter * LoopBlockSize)));
 276 }
 277
 278 } // namespace automemcpy
 279 } // namespace llvm