Use multiline attribute to check for IA2_STATE_MULTILINE.
[chromium-blink-merge.git] / sandbox / linux / bpf_dsl / codegen_unittest.cc
blob3abfc85a6c827a7c69dad9ef7e8684608301c1ee
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "sandbox/linux/bpf_dsl/codegen.h"
7 #include <linux/filter.h>
9 #include <map>
10 #include <utility>
11 #include <vector>
13 #include "base/macros.h"
14 #include "base/md5.h"
15 #include "base/strings/string_piece.h"
16 #include "testing/gtest/include/gtest/gtest.h"
18 namespace sandbox {
19 namespace {
21 // Hash provides an abstraction for building "hash trees" from BPF
22 // control flow graphs, and efficiently identifying equivalent graphs.
24 // For simplicity, we use MD5, because base happens to provide a
25 // convenient API for its use. However, any collision-resistant hash
26 // should suffice.
27 class Hash {
28 public:
29 static const Hash kZero;
31 Hash() : digest_() {}
33 Hash(uint16_t code,
34 uint32_t k,
35 const Hash& jt = kZero,
36 const Hash& jf = kZero)
37 : digest_() {
38 base::MD5Context ctx;
39 base::MD5Init(&ctx);
40 HashValue(&ctx, code);
41 HashValue(&ctx, k);
42 HashValue(&ctx, jt);
43 HashValue(&ctx, jf);
44 base::MD5Final(&digest_, &ctx);
47 Hash(const Hash& hash) = default;
48 Hash& operator=(const Hash& rhs) = default;
50 friend bool operator==(const Hash& lhs, const Hash& rhs) {
51 return lhs.Base16() == rhs.Base16();
53 friend bool operator!=(const Hash& lhs, const Hash& rhs) {
54 return !(lhs == rhs);
57 private:
58 template <typename T>
59 void HashValue(base::MD5Context* ctx, const T& value) {
60 base::MD5Update(ctx,
61 base::StringPiece(reinterpret_cast<const char*>(&value),
62 sizeof(value)));
65 std::string Base16() const {
66 return MD5DigestToBase16(digest_);
69 base::MD5Digest digest_;
72 const Hash Hash::kZero;
74 // Sanity check that equality and inequality work on Hash as required.
75 TEST(CodeGen, HashSanity) {
76 std::vector<Hash> hashes;
78 // Push a bunch of logically distinct hashes.
79 hashes.push_back(Hash::kZero);
80 for (int i = 0; i < 4; ++i) {
81 hashes.push_back(Hash(i & 1, i & 2));
83 for (int i = 0; i < 16; ++i) {
84 hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8)));
86 for (int i = 0; i < 64; ++i) {
87 hashes.push_back(
88 Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32)));
91 for (const Hash& a : hashes) {
92 for (const Hash& b : hashes) {
93 // Hashes should equal themselves, but not equal all others.
94 if (&a == &b) {
95 EXPECT_EQ(a, b);
96 } else {
97 EXPECT_NE(a, b);
103 // ProgramTest provides a fixture for writing compiling sample
104 // programs with CodeGen and verifying the linearized output matches
105 // the input DAG.
106 class ProgramTest : public ::testing::Test {
107 protected:
108 ProgramTest() : gen_(), node_hashes_() {}
110 // MakeInstruction calls CodeGen::MakeInstruction() and associated
111 // the returned address with a hash of the instruction.
112 CodeGen::Node MakeInstruction(uint16_t code,
113 uint32_t k,
114 CodeGen::Node jt = CodeGen::kNullNode,
115 CodeGen::Node jf = CodeGen::kNullNode) {
116 CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf);
117 EXPECT_NE(CodeGen::kNullNode, res);
119 Hash digest(code, k, Lookup(jt), Lookup(jf));
120 auto it = node_hashes_.insert(std::make_pair(res, digest));
121 EXPECT_EQ(digest, it.first->second);
123 return res;
126 // RunTest compiles the program and verifies that the output matches
127 // what is expected. It should be called at the end of each program
128 // test case.
129 void RunTest(CodeGen::Node head) {
130 // Compile the program
131 CodeGen::Program program;
132 gen_.Compile(head, &program);
134 // Walk the program backwards, and compute the hash for each instruction.
135 std::vector<Hash> prog_hashes(program.size());
136 for (size_t i = program.size(); i > 0; --i) {
137 const sock_filter& insn = program.at(i - 1);
138 Hash& hash = prog_hashes.at(i - 1);
140 if (BPF_CLASS(insn.code) == BPF_JMP) {
141 if (BPF_OP(insn.code) == BPF_JA) {
142 // The compiler adds JA instructions as needed, so skip them.
143 hash = prog_hashes.at(i + insn.k);
144 } else {
145 hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt),
146 prog_hashes.at(i + insn.jf));
148 } else if (BPF_CLASS(insn.code) == BPF_RET) {
149 hash = Hash(insn.code, insn.k);
150 } else {
151 hash = Hash(insn.code, insn.k, prog_hashes.at(i));
155 EXPECT_EQ(Lookup(head), prog_hashes.at(0));
158 private:
159 const Hash& Lookup(CodeGen::Node next) const {
160 if (next == CodeGen::kNullNode) {
161 return Hash::kZero;
163 auto it = node_hashes_.find(next);
164 if (it == node_hashes_.end()) {
165 ADD_FAILURE() << "No hash found for node " << next;
166 return Hash::kZero;
168 return it->second;
171 CodeGen gen_;
172 std::map<CodeGen::Node, Hash> node_hashes_;
174 DISALLOW_COPY_AND_ASSIGN(ProgramTest);
177 TEST_F(ProgramTest, OneInstruction) {
178 // Create the most basic valid BPF program:
179 // RET 0
180 CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0);
181 RunTest(head);
184 TEST_F(ProgramTest, SimpleBranch) {
185 // Create a program with a single branch:
186 // JUMP if eq 42 then $0 else $1
187 // 0: RET 1
188 // 1: RET 0
189 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42,
190 MakeInstruction(BPF_RET + BPF_K, 1),
191 MakeInstruction(BPF_RET + BPF_K, 0));
192 RunTest(head);
195 TEST_F(ProgramTest, AtypicalBranch) {
196 // Create a program with a single branch:
197 // JUMP if eq 42 then $0 else $0
198 // 0: RET 0
200 CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0);
201 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret);
203 // N.B.: As the instructions in both sides of the branch are already
204 // the same object, we do not actually have any "mergeable" branches.
205 // This needs to be reflected in our choice of "flags".
206 RunTest(head);
209 TEST_F(ProgramTest, Complex) {
210 // Creates a basic BPF program that we'll use to test some of the code:
211 // JUMP if eq 42 the $0 else $1 (insn6)
212 // 0: LD 23 (insn5)
213 // 1: JUMP if eq 42 then $2 else $4 (insn4)
214 // 2: JUMP to $3 (insn2)
215 // 3: LD 42 (insn1)
216 // RET 42 (insn0)
217 // 4: LD 42 (insn3)
218 // RET 42 (insn3+)
219 CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42);
220 CodeGen::Node insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0);
221 CodeGen::Node insn2 = insn1; // Implicit JUMP
223 // We explicitly duplicate instructions to test that they're merged.
224 CodeGen::Node insn3 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42,
225 MakeInstruction(BPF_RET + BPF_K, 42));
226 EXPECT_EQ(insn2, insn3);
228 CodeGen::Node insn4 =
229 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3);
230 CodeGen::Node insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4);
232 // Force a basic block that ends in neither a jump instruction nor a return
233 // instruction. It only contains "insn5". This exercises one of the less
234 // common code paths in the topo-sort algorithm.
235 // This also gives us a diamond-shaped pattern in our graph, which stresses
236 // another aspect of the topo-sort algorithm (namely, the ability to
237 // correctly count the incoming branches for subtrees that are not disjunct).
238 CodeGen::Node insn6 =
239 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4);
241 RunTest(insn6);
244 TEST_F(ProgramTest, ConfusingTails) {
245 // This simple program demonstrates https://crbug.com/351103/
246 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could
247 // be tempted to merge them since they are the same. However, they are
248 // not mergeable because they fall-through to non semantically equivalent
249 // blocks.
250 // Without the fix for this bug, this program should trigger the check in
251 // CompileAndCompare: the serialized graphs from the program and its compiled
252 // version will differ.
254 // 0) LOAD 1 // ???
255 // 1) if A == 0x1; then JMP 2 else JMP 3
256 // 2) LOAD 0 // System call number
257 // 3) if A == 0x2; then JMP 4 else JMP 5
258 // 4) LOAD 0 // System call number
259 // 5) if A == 0x1; then JMP 6 else JMP 7
260 // 6) RET 0
261 // 7) RET 1
263 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);
264 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);
265 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
266 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
267 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
268 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
269 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
270 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
272 RunTest(i0);
275 TEST_F(ProgramTest, ConfusingTailsBasic) {
276 // Without the fix for https://crbug.com/351103/, (see
277 // SampleProgramConfusingTails()), this would generate a cyclic graph and
278 // crash as the two "LOAD 0" instructions would get merged.
280 // 0) LOAD 1 // ???
281 // 1) if A == 0x1; then JMP 2 else JMP 3
282 // 2) LOAD 0 // System call number
283 // 3) if A == 0x2; then JMP 4 else JMP 5
284 // 4) LOAD 0 // System call number
285 // 5) RET 1
287 CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1);
288 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
289 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
290 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
291 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
292 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
294 RunTest(i0);
297 TEST_F(ProgramTest, ConfusingTailsMergeable) {
298 // This is similar to SampleProgramConfusingTails(), except that
299 // instructions 2 and 4 are now RET instructions.
300 // In PointerCompare(), this exercises the path where two blocks are of the
301 // same length and identical and the last instruction is a JMP or RET, so the
302 // following blocks don't need to be looked at and the blocks are mergeable.
304 // 0) LOAD 1 // ???
305 // 1) if A == 0x1; then JMP 2 else JMP 3
306 // 2) RET 42
307 // 3) if A == 0x2; then JMP 4 else JMP 5
308 // 4) RET 42
309 // 5) if A == 0x1; then JMP 6 else JMP 7
310 // 6) RET 0
311 // 7) RET 1
313 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);
314 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);
315 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
316 CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42);
317 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
318 CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42);
319 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
320 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
322 RunTest(i0);
325 TEST_F(ProgramTest, InstructionFolding) {
326 // Check that simple instructions are folded as expected.
327 CodeGen::Node a = MakeInstruction(BPF_RET + BPF_K, 0);
328 EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));
329 CodeGen::Node b = MakeInstruction(BPF_RET + BPF_K, 1);
330 EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));
331 EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));
332 EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));
334 // Check that complex sequences are folded too.
335 CodeGen::Node c =
336 MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0,
337 MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b));
338 EXPECT_EQ(c, MakeInstruction(
339 BPF_LD + BPF_W + BPF_ABS, 0,
340 MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b)));
342 RunTest(c);
345 TEST_F(ProgramTest, FarBranches) {
346 // BPF instructions use 8-bit fields for branch offsets, which means
347 // branch targets must be within 255 instructions of the branch
348 // instruction. CodeGen abstracts away this detail by inserting jump
349 // instructions as needed, which we test here by generating programs
350 // that should trigger any interesting boundary conditions.
352 // Populate with 260 initial instruction nodes.
353 std::vector<CodeGen::Node> nodes;
354 nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));
355 for (size_t i = 1; i < 260; ++i) {
356 nodes.push_back(
357 MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));
360 // Exhaustively test branch offsets near BPF's limits.
361 for (size_t jt = 250; jt < 260; ++jt) {
362 for (size_t jf = 250; jf < 260; ++jf) {
363 nodes.push_back(MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0,
364 nodes.rbegin()[jt], nodes.rbegin()[jf]));
365 RunTest(nodes.back());
370 TEST_F(ProgramTest, JumpReuse) {
371 // As a code size optimization, we try to reuse jumps when possible
372 // instead of emitting new ones. Here we make sure that optimization
373 // is working as intended.
375 // NOTE: To simplify testing, we rely on implementation details
376 // about what CodeGen::Node values indicate (i.e., vector indices),
377 // but CodeGen users should treat them as opaque values.
379 // Populate with 260 initial instruction nodes.
380 std::vector<CodeGen::Node> nodes;
381 nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));
382 for (size_t i = 1; i < 260; ++i) {
383 nodes.push_back(
384 MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));
387 // Branching to nodes[0] and nodes[1] should require 3 new
388 // instructions: two far jumps plus the branch itself.
389 CodeGen::Node one =
390 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, nodes[0], nodes[1]);
391 EXPECT_EQ(nodes.back() + 3, one); // XXX: Implementation detail!
392 RunTest(one);
394 // Branching again to the same target nodes should require only one
395 // new instruction, as we can reuse the previous branch's jumps.
396 CodeGen::Node two =
397 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, nodes[0], nodes[1]);
398 EXPECT_EQ(one + 1, two); // XXX: Implementation detail!
399 RunTest(two);
402 } // namespace
403 } // namespace sandbox