1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "sandbox/linux/bpf_dsl/codegen.h"
7 #include <linux/filter.h>
13 #include "base/macros.h"
15 #include "base/strings/string_piece.h"
16 #include "testing/gtest/include/gtest/gtest.h"
21 // Hash provides an abstraction for building "hash trees" from BPF
22 // control flow graphs, and efficiently identifying equivalent graphs.
24 // For simplicity, we use MD5, because base happens to provide a
25 // convenient API for its use. However, any collision-resistant hash
29 static const Hash kZero
;
35 const Hash
& jt
= kZero
,
36 const Hash
& jf
= kZero
)
40 HashValue(&ctx
, code
);
44 base::MD5Final(&digest_
, &ctx
);
47 Hash(const Hash
& hash
) = default;
48 Hash
& operator=(const Hash
& rhs
) = default;
50 friend bool operator==(const Hash
& lhs
, const Hash
& rhs
) {
51 return lhs
.Base16() == rhs
.Base16();
53 friend bool operator!=(const Hash
& lhs
, const Hash
& rhs
) {
59 void HashValue(base::MD5Context
* ctx
, const T
& value
) {
61 base::StringPiece(reinterpret_cast<const char*>(&value
),
65 std::string
Base16() const {
66 return MD5DigestToBase16(digest_
);
69 base::MD5Digest digest_
;
72 const Hash
Hash::kZero
;
74 // Sanity check that equality and inequality work on Hash as required.
75 TEST(CodeGen
, HashSanity
) {
76 std::vector
<Hash
> hashes
;
78 // Push a bunch of logically distinct hashes.
79 hashes
.push_back(Hash::kZero
);
80 for (int i
= 0; i
< 4; ++i
) {
81 hashes
.push_back(Hash(i
& 1, i
& 2));
83 for (int i
= 0; i
< 16; ++i
) {
84 hashes
.push_back(Hash(i
& 1, i
& 2, Hash(i
& 4, i
& 8)));
86 for (int i
= 0; i
< 64; ++i
) {
88 Hash(i
& 1, i
& 2, Hash(i
& 4, i
& 8), Hash(i
& 16, i
& 32)));
91 for (const Hash
& a
: hashes
) {
92 for (const Hash
& b
: hashes
) {
93 // Hashes should equal themselves, but not equal all others.
103 // ProgramTest provides a fixture for writing compiling sample
104 // programs with CodeGen and verifying the linearized output matches
106 class ProgramTest
: public ::testing::Test
{
108 ProgramTest() : gen_(), node_hashes_() {}
110 // MakeInstruction calls CodeGen::MakeInstruction() and associated
111 // the returned address with a hash of the instruction.
112 CodeGen::Node
MakeInstruction(uint16_t code
,
114 CodeGen::Node jt
= CodeGen::kNullNode
,
115 CodeGen::Node jf
= CodeGen::kNullNode
) {
116 CodeGen::Node res
= gen_
.MakeInstruction(code
, k
, jt
, jf
);
117 EXPECT_NE(CodeGen::kNullNode
, res
);
119 Hash
digest(code
, k
, Lookup(jt
), Lookup(jf
));
120 auto it
= node_hashes_
.insert(std::make_pair(res
, digest
));
121 EXPECT_EQ(digest
, it
.first
->second
);
126 // RunTest compiles the program and verifies that the output matches
127 // what is expected. It should be called at the end of each program
129 void RunTest(CodeGen::Node head
) {
130 // Compile the program
131 CodeGen::Program program
;
132 gen_
.Compile(head
, &program
);
134 // Walk the program backwards, and compute the hash for each instruction.
135 std::vector
<Hash
> prog_hashes(program
.size());
136 for (size_t i
= program
.size(); i
> 0; --i
) {
137 const sock_filter
& insn
= program
.at(i
- 1);
138 Hash
& hash
= prog_hashes
.at(i
- 1);
140 if (BPF_CLASS(insn
.code
) == BPF_JMP
) {
141 if (BPF_OP(insn
.code
) == BPF_JA
) {
142 // The compiler adds JA instructions as needed, so skip them.
143 hash
= prog_hashes
.at(i
+ insn
.k
);
145 hash
= Hash(insn
.code
, insn
.k
, prog_hashes
.at(i
+ insn
.jt
),
146 prog_hashes
.at(i
+ insn
.jf
));
148 } else if (BPF_CLASS(insn
.code
) == BPF_RET
) {
149 hash
= Hash(insn
.code
, insn
.k
);
151 hash
= Hash(insn
.code
, insn
.k
, prog_hashes
.at(i
));
155 EXPECT_EQ(Lookup(head
), prog_hashes
.at(0));
159 const Hash
& Lookup(CodeGen::Node next
) const {
160 if (next
== CodeGen::kNullNode
) {
163 auto it
= node_hashes_
.find(next
);
164 if (it
== node_hashes_
.end()) {
165 ADD_FAILURE() << "No hash found for node " << next
;
172 std::map
<CodeGen::Node
, Hash
> node_hashes_
;
174 DISALLOW_COPY_AND_ASSIGN(ProgramTest
);
177 TEST_F(ProgramTest
, OneInstruction
) {
178 // Create the most basic valid BPF program:
180 CodeGen::Node head
= MakeInstruction(BPF_RET
+ BPF_K
, 0);
184 TEST_F(ProgramTest
, SimpleBranch
) {
185 // Create a program with a single branch:
186 // JUMP if eq 42 then $0 else $1
189 CodeGen::Node head
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 42,
190 MakeInstruction(BPF_RET
+ BPF_K
, 1),
191 MakeInstruction(BPF_RET
+ BPF_K
, 0));
195 TEST_F(ProgramTest
, AtypicalBranch
) {
196 // Create a program with a single branch:
197 // JUMP if eq 42 then $0 else $0
200 CodeGen::Node ret
= MakeInstruction(BPF_RET
+ BPF_K
, 0);
201 CodeGen::Node head
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 42, ret
, ret
);
203 // N.B.: As the instructions in both sides of the branch are already
204 // the same object, we do not actually have any "mergeable" branches.
205 // This needs to be reflected in our choice of "flags".
209 TEST_F(ProgramTest
, Complex
) {
210 // Creates a basic BPF program that we'll use to test some of the code:
211 // JUMP if eq 42 the $0 else $1 (insn6)
213 // 1: JUMP if eq 42 then $2 else $4 (insn4)
214 // 2: JUMP to $3 (insn2)
219 CodeGen::Node insn0
= MakeInstruction(BPF_RET
+ BPF_K
, 42);
220 CodeGen::Node insn1
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 42, insn0
);
221 CodeGen::Node insn2
= insn1
; // Implicit JUMP
223 // We explicitly duplicate instructions to test that they're merged.
224 CodeGen::Node insn3
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 42,
225 MakeInstruction(BPF_RET
+ BPF_K
, 42));
226 EXPECT_EQ(insn2
, insn3
);
228 CodeGen::Node insn4
=
229 MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 42, insn2
, insn3
);
230 CodeGen::Node insn5
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 23, insn4
);
232 // Force a basic block that ends in neither a jump instruction nor a return
233 // instruction. It only contains "insn5". This exercises one of the less
234 // common code paths in the topo-sort algorithm.
235 // This also gives us a diamond-shaped pattern in our graph, which stresses
236 // another aspect of the topo-sort algorithm (namely, the ability to
237 // correctly count the incoming branches for subtrees that are not disjunct).
238 CodeGen::Node insn6
=
239 MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 42, insn5
, insn4
);
244 TEST_F(ProgramTest
, ConfusingTails
) {
245 // This simple program demonstrates https://crbug.com/351103/
246 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could
247 // be tempted to merge them since they are the same. However, they are
248 // not mergeable because they fall-through to non semantically equivalent
250 // Without the fix for this bug, this program should trigger the check in
251 // CompileAndCompare: the serialized graphs from the program and its compiled
252 // version will differ.
255 // 1) if A == 0x1; then JMP 2 else JMP 3
256 // 2) LOAD 0 // System call number
257 // 3) if A == 0x2; then JMP 4 else JMP 5
258 // 4) LOAD 0 // System call number
259 // 5) if A == 0x1; then JMP 6 else JMP 7
263 CodeGen::Node i7
= MakeInstruction(BPF_RET
+ BPF_K
, 1);
264 CodeGen::Node i6
= MakeInstruction(BPF_RET
+ BPF_K
, 0);
265 CodeGen::Node i5
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i6
, i7
);
266 CodeGen::Node i4
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 0, i5
);
267 CodeGen::Node i3
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 2, i4
, i5
);
268 CodeGen::Node i2
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 0, i3
);
269 CodeGen::Node i1
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i2
, i3
);
270 CodeGen::Node i0
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 1, i1
);
275 TEST_F(ProgramTest
, ConfusingTailsBasic
) {
276 // Without the fix for https://crbug.com/351103/, (see
277 // SampleProgramConfusingTails()), this would generate a cyclic graph and
278 // crash as the two "LOAD 0" instructions would get merged.
281 // 1) if A == 0x1; then JMP 2 else JMP 3
282 // 2) LOAD 0 // System call number
283 // 3) if A == 0x2; then JMP 4 else JMP 5
284 // 4) LOAD 0 // System call number
287 CodeGen::Node i5
= MakeInstruction(BPF_RET
+ BPF_K
, 1);
288 CodeGen::Node i4
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 0, i5
);
289 CodeGen::Node i3
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 2, i4
, i5
);
290 CodeGen::Node i2
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 0, i3
);
291 CodeGen::Node i1
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i2
, i3
);
292 CodeGen::Node i0
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 1, i1
);
297 TEST_F(ProgramTest
, ConfusingTailsMergeable
) {
298 // This is similar to SampleProgramConfusingTails(), except that
299 // instructions 2 and 4 are now RET instructions.
300 // In PointerCompare(), this exercises the path where two blocks are of the
301 // same length and identical and the last instruction is a JMP or RET, so the
302 // following blocks don't need to be looked at and the blocks are mergeable.
305 // 1) if A == 0x1; then JMP 2 else JMP 3
307 // 3) if A == 0x2; then JMP 4 else JMP 5
309 // 5) if A == 0x1; then JMP 6 else JMP 7
313 CodeGen::Node i7
= MakeInstruction(BPF_RET
+ BPF_K
, 1);
314 CodeGen::Node i6
= MakeInstruction(BPF_RET
+ BPF_K
, 0);
315 CodeGen::Node i5
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i6
, i7
);
316 CodeGen::Node i4
= MakeInstruction(BPF_RET
+ BPF_K
, 42);
317 CodeGen::Node i3
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 2, i4
, i5
);
318 CodeGen::Node i2
= MakeInstruction(BPF_RET
+ BPF_K
, 42);
319 CodeGen::Node i1
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i2
, i3
);
320 CodeGen::Node i0
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 1, i1
);
325 TEST_F(ProgramTest
, InstructionFolding
) {
326 // Check that simple instructions are folded as expected.
327 CodeGen::Node a
= MakeInstruction(BPF_RET
+ BPF_K
, 0);
328 EXPECT_EQ(a
, MakeInstruction(BPF_RET
+ BPF_K
, 0));
329 CodeGen::Node b
= MakeInstruction(BPF_RET
+ BPF_K
, 1);
330 EXPECT_EQ(a
, MakeInstruction(BPF_RET
+ BPF_K
, 0));
331 EXPECT_EQ(b
, MakeInstruction(BPF_RET
+ BPF_K
, 1));
332 EXPECT_EQ(b
, MakeInstruction(BPF_RET
+ BPF_K
, 1));
334 // Check that complex sequences are folded too.
336 MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 0,
337 MakeInstruction(BPF_JMP
+ BPF_JSET
+ BPF_K
, 0x100, a
, b
));
338 EXPECT_EQ(c
, MakeInstruction(
339 BPF_LD
+ BPF_W
+ BPF_ABS
, 0,
340 MakeInstruction(BPF_JMP
+ BPF_JSET
+ BPF_K
, 0x100, a
, b
)));
345 TEST_F(ProgramTest
, FarBranches
) {
346 // BPF instructions use 8-bit fields for branch offsets, which means
347 // branch targets must be within 255 instructions of the branch
348 // instruction. CodeGen abstracts away this detail by inserting jump
349 // instructions as needed, which we test here by generating programs
350 // that should trigger any interesting boundary conditions.
352 // Populate with 260 initial instruction nodes.
353 std::vector
<CodeGen::Node
> nodes
;
354 nodes
.push_back(MakeInstruction(BPF_RET
+ BPF_K
, 0));
355 for (size_t i
= 1; i
< 260; ++i
) {
357 MakeInstruction(BPF_ALU
+ BPF_ADD
+ BPF_K
, i
, nodes
.back()));
360 // Exhaustively test branch offsets near BPF's limits.
361 for (size_t jt
= 250; jt
< 260; ++jt
) {
362 for (size_t jf
= 250; jf
< 260; ++jf
) {
363 nodes
.push_back(MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 0,
364 nodes
.rbegin()[jt
], nodes
.rbegin()[jf
]));
365 RunTest(nodes
.back());
370 TEST_F(ProgramTest
, JumpReuse
) {
371 // As a code size optimization, we try to reuse jumps when possible
372 // instead of emitting new ones. Here we make sure that optimization
373 // is working as intended.
375 // NOTE: To simplify testing, we rely on implementation details
376 // about what CodeGen::Node values indicate (i.e., vector indices),
377 // but CodeGen users should treat them as opaque values.
379 // Populate with 260 initial instruction nodes.
380 std::vector
<CodeGen::Node
> nodes
;
381 nodes
.push_back(MakeInstruction(BPF_RET
+ BPF_K
, 0));
382 for (size_t i
= 1; i
< 260; ++i
) {
384 MakeInstruction(BPF_ALU
+ BPF_ADD
+ BPF_K
, i
, nodes
.back()));
387 // Branching to nodes[0] and nodes[1] should require 3 new
388 // instructions: two far jumps plus the branch itself.
390 MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 0, nodes
[0], nodes
[1]);
391 EXPECT_EQ(nodes
.back() + 3, one
); // XXX: Implementation detail!
394 // Branching again to the same target nodes should require only one
395 // new instruction, as we can reuse the previous branch's jumps.
397 MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, nodes
[0], nodes
[1]);
398 EXPECT_EQ(one
+ 1, two
); // XXX: Implementation detail!
403 } // namespace sandbox