1 //===-- Generate random but valid function descriptors -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "automemcpy/RandomFunctionGenerator.h"
11 #include <llvm/ADT/StringRef.h>
12 #include <llvm/Support/raw_ostream.h>
18 namespace automemcpy
{
20 // Exploration parameters
21 // ----------------------
22 // Here we define a set of values that will contraint the exploration and
23 // limit combinatorial explosion.
25 // We limit the number of cases for individual sizes to sizes up to 4.
26 // More individual sizes don't bring much over the overlapping strategy.
27 static constexpr int kMaxIndividualSize
= 4;
29 // We limit Overlapping Strategy to sizes up to 256.
30 // An overlap of 256B means accessing 128B at once which is usually not
31 // feasible by current CPUs. We rely on the compiler to generate multiple
32 // loads/stores if needed but higher sizes are unlikely to benefit from hardware
34 static constexpr int kMaxOverlapSize
= 256;
36 // For the loop strategies, we make sure that they iterate at least a certain
37 // number of times to amortize the cost of looping.
38 static constexpr int kLoopMinIter
= 3;
39 static constexpr int kAlignedLoopMinIter
= 2;
41 // We restrict the size of the block of data to handle in a loop.
42 // Generally speaking block size <= 16 perform poorly.
43 static constexpr int kLoopBlockSize
[] = {16, 32, 64};
45 // We restrict alignment to the following values.
46 static constexpr int kLoopAlignments
[] = {16, 32, 64};
48 // We make sure that the region bounds are one of the following values.
49 static constexpr int kAnchors
[] = {0, 1, 2, 4, 8, 16, 32, 48,
50 64, 96, 128, 256, 512, 1024, kMaxSize
};
52 // We also allow disabling loops, aligned loops and accelerators.
53 static constexpr bool kDisableLoop
= false;
54 static constexpr bool kDisableAlignedLoop
= false;
55 static constexpr bool kDisableAccelerator
= false;
57 // For memcpy, we can also explore whether aligning on source or destination has
59 static constexpr bool kExploreAlignmentArg
= true;
61 // The function we generate code for.
62 // BCMP is specifically disabled for now.
63 static constexpr int kFunctionTypes
[] = {
64 (int)FunctionType::MEMCPY
,
65 (int)FunctionType::MEMCMP
,
66 // (int)FunctionType::BCMP,
67 (int)FunctionType::MEMSET
,
68 (int)FunctionType::BZERO
,
71 // The actual implementation of each function can be handled via primitive types
72 // (SCALAR), vector types where available (NATIVE) or by the compiler (BUILTIN).
73 // We want to move toward delegating the code generation entirely to the
74 // compiler but for now we have to make use of -per microarchitecture- custom
75 // implementations. Scalar being more portable but also less performant, we
77 static constexpr int kElementClasses
[] = {
78 // (int)ElementTypeClass::SCALAR,
79 (int)ElementTypeClass::NATIVE
,
80 // (int)ElementTypeClass::BUILTIN
83 RandomFunctionGenerator::RandomFunctionGenerator()
84 : Solver(Context
), Type(Context
.int_const("Type")),
85 ContiguousBegin(Context
.int_const("ContiguousBegin")),
86 ContiguousEnd(Context
.int_const("ContiguousEnd")),
87 OverlapBegin(Context
.int_const("OverlapBegin")),
88 OverlapEnd(Context
.int_const("OverlapEnd")),
89 LoopBegin(Context
.int_const("LoopBegin")),
90 LoopEnd(Context
.int_const("LoopEnd")),
91 LoopBlockSize(Context
.int_const("LoopBlockSize")),
92 AlignedLoopBegin(Context
.int_const("AlignedLoopBegin")),
93 AlignedLoopEnd(Context
.int_const("AlignedLoopEnd")),
94 AlignedLoopBlockSize(Context
.int_const("AlignedLoopBlockSize")),
95 AlignedAlignment(Context
.int_const("AlignedAlignment")),
96 AlignedArg(Context
.int_const("AlignedArg")),
97 AcceleratorBegin(Context
.int_const("AcceleratorBegin")),
98 AcceleratorEnd(Context
.int_const("AcceleratorEnd")),
99 ElementClass(Context
.int_const("ElementClass")) {
100 // All possible functions.
101 Solver
.add(inSetConstraint(Type
, kFunctionTypes
));
103 // Add constraints for region bounds.
104 addBoundsAndAnchors(ContiguousBegin
, ContiguousEnd
);
105 addBoundsAndAnchors(OverlapBegin
, OverlapEnd
);
106 addBoundsAndAnchors(LoopBegin
, LoopEnd
);
107 addBoundsAndAnchors(AlignedLoopBegin
, AlignedLoopEnd
);
108 addBoundsAndAnchors(AcceleratorBegin
, AcceleratorEnd
);
109 // We always consider strategies in this order, and we
110 // always end with the `Accelerator` strategy, as it's typically more
111 // efficient for large sizes.
112 // Contiguous <= Overlap <= Loop <= AlignedLoop <= Accelerator
113 Solver
.add(ContiguousEnd
== OverlapBegin
);
114 Solver
.add(OverlapEnd
== LoopBegin
);
115 Solver
.add(LoopEnd
== AlignedLoopBegin
);
116 Solver
.add(AlignedLoopEnd
== AcceleratorBegin
);
117 // Fix endpoints: The minimum size that we want to copy is 0, and we always
118 // start with the `Contiguous` strategy. The max size is `kMaxSize`.
119 Solver
.add(ContiguousBegin
== 0);
120 Solver
.add(AcceleratorEnd
== kMaxSize
);
122 Solver
.add(ContiguousEnd
<= kMaxIndividualSize
+ 1);
124 Solver
.add(OverlapEnd
<= kMaxOverlapSize
+ 1);
125 // Overlap only ever makes sense when accessing multiple bytes at a time.
126 // i.e. Overlap<1> is useless.
127 Solver
.add(OverlapBegin
== OverlapEnd
|| OverlapBegin
>= 2);
129 addLoopConstraints(LoopBegin
, LoopEnd
, LoopBlockSize
, kLoopMinIter
);
131 addLoopConstraints(AlignedLoopBegin
, AlignedLoopEnd
, AlignedLoopBlockSize
,
132 kAlignedLoopMinIter
);
133 Solver
.add(inSetConstraint(AlignedAlignment
, kLoopAlignments
));
134 Solver
.add(AlignedLoopBegin
== AlignedLoopEnd
|| AlignedLoopBegin
>= 64);
135 Solver
.add(AlignedLoopBlockSize
>= AlignedAlignment
);
136 Solver
.add(AlignedLoopBlockSize
>= LoopBlockSize
);
137 z3::expr IsMemcpy
= Type
== (int)FunctionType::MEMCPY
;
138 z3::expr ExploreAlignment
= IsMemcpy
&& kExploreAlignmentArg
;
141 inSetConstraint(AlignedArg
, {(int)AlignArg::_1
, (int)AlignArg::_2
})) ||
142 (!ExploreAlignment
&& AlignedArg
== (int)AlignArg::_1
));
144 Solver
.add(IsMemcpy
||
146 AcceleratorEnd
)); // Only Memcpy has accelerator for now.
148 Solver
.add(inSetConstraint(ElementClass
, kElementClasses
));
151 Solver
.add(LoopBegin
== LoopEnd
);
152 if (kDisableAlignedLoop
)
153 Solver
.add(AlignedLoopBegin
== AlignedLoopEnd
);
154 if (kDisableAccelerator
)
155 Solver
.add(AcceleratorBegin
== AcceleratorEnd
);
158 // Creates SizeSpan from Begin/End values.
159 // Returns std::nullopt if Begin==End.
160 static std::optional
<SizeSpan
> AsSizeSpan(size_t Begin
, size_t End
) {
169 // Generic method to create a `Region` struct with a Span or std::nullopt if
171 template <typename Region
>
172 static std::optional
<Region
> As(size_t Begin
, size_t End
) {
173 if (auto Span
= AsSizeSpan(Begin
, End
)) {
181 // Returns a Loop struct or std::nullopt if span is empty.
182 static std::optional
<Loop
> AsLoop(size_t Begin
, size_t End
, size_t BlockSize
) {
183 if (auto Span
= AsSizeSpan(Begin
, End
)) {
186 Output
.BlockSize
= BlockSize
;
192 // Returns an AlignedLoop struct or std::nullopt if span is empty.
193 static std::optional
<AlignedLoop
> AsAlignedLoop(size_t Begin
, size_t End
,
197 if (auto Loop
= AsLoop(Begin
, End
, BlockSize
)) {
200 Output
.Alignment
= Alignment
;
201 Output
.AlignTo
= AlignTo
;
207 std::optional
<FunctionDescriptor
> RandomFunctionGenerator::next() {
208 if (Solver
.check() != z3::sat
)
211 z3::model m
= Solver
.get_model();
213 // Helper method to get the current numerical value of a z3::expr.
214 const auto E
= [&m
](z3::expr
&V
) -> int {
215 return m
.eval(V
).get_numeral_int();
218 // Fill is the function descriptor to return.
219 FunctionDescriptor R
;
220 R
.Type
= FunctionType(E(Type
));
221 R
.Contiguous
= As
<Contiguous
>(E(ContiguousBegin
), E(ContiguousEnd
));
222 R
.Overlap
= As
<Overlap
>(E(OverlapBegin
), E(OverlapEnd
));
223 R
.Loop
= AsLoop(E(LoopBegin
), E(LoopEnd
), E(LoopBlockSize
));
224 R
.AlignedLoop
= AsAlignedLoop(E(AlignedLoopBegin
), E(AlignedLoopEnd
),
225 E(AlignedLoopBlockSize
), E(AlignedAlignment
),
226 AlignArg(E(AlignedArg
)));
227 R
.Accelerator
= As
<Accelerator
>(E(AcceleratorBegin
), E(AcceleratorEnd
));
228 R
.ElementClass
= ElementTypeClass(E(ElementClass
));
230 // Express current state as a set of constraints.
231 z3::expr CurrentLayout
=
232 (Type
== E(Type
)) && (ContiguousBegin
== E(ContiguousBegin
)) &&
233 (ContiguousEnd
== E(ContiguousEnd
)) &&
234 (OverlapBegin
== E(OverlapBegin
)) && (OverlapEnd
== E(OverlapEnd
)) &&
235 (LoopBegin
== E(LoopBegin
)) && (LoopEnd
== E(LoopEnd
)) &&
236 (LoopBlockSize
== E(LoopBlockSize
)) &&
237 (AlignedLoopBegin
== E(AlignedLoopBegin
)) &&
238 (AlignedLoopEnd
== E(AlignedLoopEnd
)) &&
239 (AlignedLoopBlockSize
== E(AlignedLoopBlockSize
)) &&
240 (AlignedAlignment
== E(AlignedAlignment
)) &&
241 (AlignedArg
== E(AlignedArg
)) &&
242 (AcceleratorBegin
== E(AcceleratorBegin
)) &&
243 (AcceleratorEnd
== E(AcceleratorEnd
)) &&
244 (ElementClass
== E(ElementClass
));
246 // Ask solver to never show this configuration ever again.
247 Solver
.add(!CurrentLayout
);
251 // Make sure `Variable` is one of the provided values.
252 z3::expr
RandomFunctionGenerator::inSetConstraint(z3::expr
&Variable
,
253 ArrayRef
<int> Values
) const {
254 z3::expr_vector
Args(Variable
.ctx());
255 for (int Value
: Values
)
256 Args
.push_back(Variable
== Value
);
257 return z3::mk_or(Args
);
260 void RandomFunctionGenerator::addBoundsAndAnchors(z3::expr
&Begin
,
262 // Begin and End are picked amongst a set of predefined values.
263 Solver
.add(inSetConstraint(Begin
, kAnchors
));
264 Solver
.add(inSetConstraint(End
, kAnchors
));
265 Solver
.add(Begin
>= 0);
266 Solver
.add(Begin
<= End
);
267 Solver
.add(End
<= kMaxSize
);
270 void RandomFunctionGenerator::addLoopConstraints(const z3::expr
&LoopBegin
,
271 const z3::expr
&LoopEnd
,
272 z3::expr
&LoopBlockSize
,
274 Solver
.add(inSetConstraint(LoopBlockSize
, kLoopBlockSize
));
275 Solver
.add(LoopBegin
== LoopEnd
||
276 (LoopBegin
> (LoopMinIter
* LoopBlockSize
)));
279 } // namespace automemcpy