Roll src/third_party/WebKit d9c6159:8139f33 (svn 201974:201975)
[chromium-blink-merge.git] / sandbox / linux / bpf_dsl / codegen_unittest.cc
blob596182212302dc5e26bc75a3aa402c544574a23f
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "sandbox/linux/bpf_dsl/codegen.h"
7 #include <map>
8 #include <utility>
9 #include <vector>
11 #include "base/macros.h"
12 #include "base/md5.h"
13 #include "base/strings/string_piece.h"
14 #include "sandbox/linux/system_headers/linux_filter.h"
15 #include "testing/gtest/include/gtest/gtest.h"
17 namespace sandbox {
18 namespace {
20 // Hash provides an abstraction for building "hash trees" from BPF
21 // control flow graphs, and efficiently identifying equivalent graphs.
23 // For simplicity, we use MD5, because base happens to provide a
24 // convenient API for its use. However, any collision-resistant hash
25 // should suffice.
26 class Hash {
27 public:
28 static const Hash kZero;
30 Hash() : digest_() {}
32 Hash(uint16_t code,
33 uint32_t k,
34 const Hash& jt = kZero,
35 const Hash& jf = kZero)
36 : digest_() {
37 base::MD5Context ctx;
38 base::MD5Init(&ctx);
39 HashValue(&ctx, code);
40 HashValue(&ctx, k);
41 HashValue(&ctx, jt);
42 HashValue(&ctx, jf);
43 base::MD5Final(&digest_, &ctx);
46 Hash(const Hash& hash) = default;
47 Hash& operator=(const Hash& rhs) = default;
49 friend bool operator==(const Hash& lhs, const Hash& rhs) {
50 return lhs.Base16() == rhs.Base16();
52 friend bool operator!=(const Hash& lhs, const Hash& rhs) {
53 return !(lhs == rhs);
56 private:
57 template <typename T>
58 void HashValue(base::MD5Context* ctx, const T& value) {
59 base::MD5Update(ctx,
60 base::StringPiece(reinterpret_cast<const char*>(&value),
61 sizeof(value)));
64 std::string Base16() const {
65 return base::MD5DigestToBase16(digest_);
68 base::MD5Digest digest_;
71 const Hash Hash::kZero;
73 // Sanity check that equality and inequality work on Hash as required.
74 TEST(CodeGen, HashSanity) {
75 std::vector<Hash> hashes;
77 // Push a bunch of logically distinct hashes.
78 hashes.push_back(Hash::kZero);
79 for (int i = 0; i < 4; ++i) {
80 hashes.push_back(Hash(i & 1, i & 2));
82 for (int i = 0; i < 16; ++i) {
83 hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8)));
85 for (int i = 0; i < 64; ++i) {
86 hashes.push_back(
87 Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32)));
90 for (const Hash& a : hashes) {
91 for (const Hash& b : hashes) {
92 // Hashes should equal themselves, but not equal all others.
93 if (&a == &b) {
94 EXPECT_EQ(a, b);
95 } else {
96 EXPECT_NE(a, b);
102 // ProgramTest provides a fixture for writing compiling sample
103 // programs with CodeGen and verifying the linearized output matches
104 // the input DAG.
105 class ProgramTest : public ::testing::Test {
106 protected:
107 ProgramTest() : gen_(), node_hashes_() {}
109 // MakeInstruction calls CodeGen::MakeInstruction() and associated
110 // the returned address with a hash of the instruction.
111 CodeGen::Node MakeInstruction(uint16_t code,
112 uint32_t k,
113 CodeGen::Node jt = CodeGen::kNullNode,
114 CodeGen::Node jf = CodeGen::kNullNode) {
115 CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf);
116 EXPECT_NE(CodeGen::kNullNode, res);
118 Hash digest(code, k, Lookup(jt), Lookup(jf));
119 auto it = node_hashes_.insert(std::make_pair(res, digest));
120 EXPECT_EQ(digest, it.first->second);
122 return res;
125 // RunTest compiles the program and verifies that the output matches
126 // what is expected. It should be called at the end of each program
127 // test case.
128 void RunTest(CodeGen::Node head) {
129 // Compile the program
130 CodeGen::Program program;
131 gen_.Compile(head, &program);
133 // Walk the program backwards, and compute the hash for each instruction.
134 std::vector<Hash> prog_hashes(program.size());
135 for (size_t i = program.size(); i > 0; --i) {
136 const sock_filter& insn = program.at(i - 1);
137 Hash& hash = prog_hashes.at(i - 1);
139 if (BPF_CLASS(insn.code) == BPF_JMP) {
140 if (BPF_OP(insn.code) == BPF_JA) {
141 // The compiler adds JA instructions as needed, so skip them.
142 hash = prog_hashes.at(i + insn.k);
143 } else {
144 hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt),
145 prog_hashes.at(i + insn.jf));
147 } else if (BPF_CLASS(insn.code) == BPF_RET) {
148 hash = Hash(insn.code, insn.k);
149 } else {
150 hash = Hash(insn.code, insn.k, prog_hashes.at(i));
154 EXPECT_EQ(Lookup(head), prog_hashes.at(0));
157 private:
158 const Hash& Lookup(CodeGen::Node next) const {
159 if (next == CodeGen::kNullNode) {
160 return Hash::kZero;
162 auto it = node_hashes_.find(next);
163 if (it == node_hashes_.end()) {
164 ADD_FAILURE() << "No hash found for node " << next;
165 return Hash::kZero;
167 return it->second;
170 CodeGen gen_;
171 std::map<CodeGen::Node, Hash> node_hashes_;
173 DISALLOW_COPY_AND_ASSIGN(ProgramTest);
176 TEST_F(ProgramTest, OneInstruction) {
177 // Create the most basic valid BPF program:
178 // RET 0
179 CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0);
180 RunTest(head);
183 TEST_F(ProgramTest, SimpleBranch) {
184 // Create a program with a single branch:
185 // JUMP if eq 42 then $0 else $1
186 // 0: RET 1
187 // 1: RET 0
188 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42,
189 MakeInstruction(BPF_RET + BPF_K, 1),
190 MakeInstruction(BPF_RET + BPF_K, 0));
191 RunTest(head);
194 TEST_F(ProgramTest, AtypicalBranch) {
195 // Create a program with a single branch:
196 // JUMP if eq 42 then $0 else $0
197 // 0: RET 0
199 CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0);
200 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret);
202 // N.B.: As the instructions in both sides of the branch are already
203 // the same object, we do not actually have any "mergeable" branches.
204 // This needs to be reflected in our choice of "flags".
205 RunTest(head);
208 TEST_F(ProgramTest, Complex) {
209 // Creates a basic BPF program that we'll use to test some of the code:
210 // JUMP if eq 42 the $0 else $1 (insn6)
211 // 0: LD 23 (insn5)
212 // 1: JUMP if eq 42 then $2 else $4 (insn4)
213 // 2: JUMP to $3 (insn2)
214 // 3: LD 42 (insn1)
215 // RET 42 (insn0)
216 // 4: LD 42 (insn3)
217 // RET 42 (insn3+)
218 CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42);
219 CodeGen::Node insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0);
220 CodeGen::Node insn2 = insn1; // Implicit JUMP
222 // We explicitly duplicate instructions to test that they're merged.
223 CodeGen::Node insn3 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42,
224 MakeInstruction(BPF_RET + BPF_K, 42));
225 EXPECT_EQ(insn2, insn3);
227 CodeGen::Node insn4 =
228 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3);
229 CodeGen::Node insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4);
231 // Force a basic block that ends in neither a jump instruction nor a return
232 // instruction. It only contains "insn5". This exercises one of the less
233 // common code paths in the topo-sort algorithm.
234 // This also gives us a diamond-shaped pattern in our graph, which stresses
235 // another aspect of the topo-sort algorithm (namely, the ability to
236 // correctly count the incoming branches for subtrees that are not disjunct).
237 CodeGen::Node insn6 =
238 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4);
240 RunTest(insn6);
243 TEST_F(ProgramTest, ConfusingTails) {
244 // This simple program demonstrates https://crbug.com/351103/
245 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could
246 // be tempted to merge them since they are the same. However, they are
247 // not mergeable because they fall-through to non semantically equivalent
248 // blocks.
249 // Without the fix for this bug, this program should trigger the check in
250 // CompileAndCompare: the serialized graphs from the program and its compiled
251 // version will differ.
253 // 0) LOAD 1 // ???
254 // 1) if A == 0x1; then JMP 2 else JMP 3
255 // 2) LOAD 0 // System call number
256 // 3) if A == 0x2; then JMP 4 else JMP 5
257 // 4) LOAD 0 // System call number
258 // 5) if A == 0x1; then JMP 6 else JMP 7
259 // 6) RET 0
260 // 7) RET 1
262 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);
263 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);
264 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
265 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
266 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
267 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
268 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
269 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
271 RunTest(i0);
274 TEST_F(ProgramTest, ConfusingTailsBasic) {
275 // Without the fix for https://crbug.com/351103/, (see
276 // SampleProgramConfusingTails()), this would generate a cyclic graph and
277 // crash as the two "LOAD 0" instructions would get merged.
279 // 0) LOAD 1 // ???
280 // 1) if A == 0x1; then JMP 2 else JMP 3
281 // 2) LOAD 0 // System call number
282 // 3) if A == 0x2; then JMP 4 else JMP 5
283 // 4) LOAD 0 // System call number
284 // 5) RET 1
286 CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1);
287 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
288 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
289 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
290 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
291 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
293 RunTest(i0);
296 TEST_F(ProgramTest, ConfusingTailsMergeable) {
297 // This is similar to SampleProgramConfusingTails(), except that
298 // instructions 2 and 4 are now RET instructions.
299 // In PointerCompare(), this exercises the path where two blocks are of the
300 // same length and identical and the last instruction is a JMP or RET, so the
301 // following blocks don't need to be looked at and the blocks are mergeable.
303 // 0) LOAD 1 // ???
304 // 1) if A == 0x1; then JMP 2 else JMP 3
305 // 2) RET 42
306 // 3) if A == 0x2; then JMP 4 else JMP 5
307 // 4) RET 42
308 // 5) if A == 0x1; then JMP 6 else JMP 7
309 // 6) RET 0
310 // 7) RET 1
312 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);
313 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);
314 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
315 CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42);
316 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
317 CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42);
318 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
319 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
321 RunTest(i0);
324 TEST_F(ProgramTest, InstructionFolding) {
325 // Check that simple instructions are folded as expected.
326 CodeGen::Node a = MakeInstruction(BPF_RET + BPF_K, 0);
327 EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));
328 CodeGen::Node b = MakeInstruction(BPF_RET + BPF_K, 1);
329 EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));
330 EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));
331 EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));
333 // Check that complex sequences are folded too.
334 CodeGen::Node c =
335 MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0,
336 MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b));
337 EXPECT_EQ(c, MakeInstruction(
338 BPF_LD + BPF_W + BPF_ABS, 0,
339 MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b)));
341 RunTest(c);
344 TEST_F(ProgramTest, FarBranches) {
345 // BPF instructions use 8-bit fields for branch offsets, which means
346 // branch targets must be within 255 instructions of the branch
347 // instruction. CodeGen abstracts away this detail by inserting jump
348 // instructions as needed, which we test here by generating programs
349 // that should trigger any interesting boundary conditions.
351 // Populate with 260 initial instruction nodes.
352 std::vector<CodeGen::Node> nodes;
353 nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));
354 for (size_t i = 1; i < 260; ++i) {
355 nodes.push_back(
356 MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));
359 // Exhaustively test branch offsets near BPF's limits.
360 for (size_t jt = 250; jt < 260; ++jt) {
361 for (size_t jf = 250; jf < 260; ++jf) {
362 nodes.push_back(MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0,
363 nodes.rbegin()[jt], nodes.rbegin()[jf]));
364 RunTest(nodes.back());
369 TEST_F(ProgramTest, JumpReuse) {
370 // As a code size optimization, we try to reuse jumps when possible
371 // instead of emitting new ones. Here we make sure that optimization
372 // is working as intended.
374 // NOTE: To simplify testing, we rely on implementation details
375 // about what CodeGen::Node values indicate (i.e., vector indices),
376 // but CodeGen users should treat them as opaque values.
378 // Populate with 260 initial instruction nodes.
379 std::vector<CodeGen::Node> nodes;
380 nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));
381 for (size_t i = 1; i < 260; ++i) {
382 nodes.push_back(
383 MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));
386 // Branching to nodes[0] and nodes[1] should require 3 new
387 // instructions: two far jumps plus the branch itself.
388 CodeGen::Node one =
389 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, nodes[0], nodes[1]);
390 EXPECT_EQ(nodes.back() + 3, one); // XXX: Implementation detail!
391 RunTest(one);
393 // Branching again to the same target nodes should require only one
394 // new instruction, as we can reuse the previous branch's jumps.
395 CodeGen::Node two =
396 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, nodes[0], nodes[1]);
397 EXPECT_EQ(one + 1, two); // XXX: Implementation detail!
398 RunTest(two);
401 } // namespace
402 } // namespace sandbox