1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "sandbox/linux/bpf_dsl/codegen.h"
11 #include "base/macros.h"
13 #include "base/strings/string_piece.h"
14 #include "sandbox/linux/system_headers/linux_filter.h"
15 #include "testing/gtest/include/gtest/gtest.h"
20 // Hash provides an abstraction for building "hash trees" from BPF
21 // control flow graphs, and efficiently identifying equivalent graphs.
23 // For simplicity, we use MD5, because base happens to provide a
24 // convenient API for its use. However, any collision-resistant hash
28 static const Hash kZero
;
34 const Hash
& jt
= kZero
,
35 const Hash
& jf
= kZero
)
39 HashValue(&ctx
, code
);
43 base::MD5Final(&digest_
, &ctx
);
46 Hash(const Hash
& hash
) = default;
47 Hash
& operator=(const Hash
& rhs
) = default;
49 friend bool operator==(const Hash
& lhs
, const Hash
& rhs
) {
50 return lhs
.Base16() == rhs
.Base16();
52 friend bool operator!=(const Hash
& lhs
, const Hash
& rhs
) {
58 void HashValue(base::MD5Context
* ctx
, const T
& value
) {
60 base::StringPiece(reinterpret_cast<const char*>(&value
),
64 std::string
Base16() const {
65 return MD5DigestToBase16(digest_
);
68 base::MD5Digest digest_
;
71 const Hash
Hash::kZero
;
73 // Sanity check that equality and inequality work on Hash as required.
74 TEST(CodeGen
, HashSanity
) {
75 std::vector
<Hash
> hashes
;
77 // Push a bunch of logically distinct hashes.
78 hashes
.push_back(Hash::kZero
);
79 for (int i
= 0; i
< 4; ++i
) {
80 hashes
.push_back(Hash(i
& 1, i
& 2));
82 for (int i
= 0; i
< 16; ++i
) {
83 hashes
.push_back(Hash(i
& 1, i
& 2, Hash(i
& 4, i
& 8)));
85 for (int i
= 0; i
< 64; ++i
) {
87 Hash(i
& 1, i
& 2, Hash(i
& 4, i
& 8), Hash(i
& 16, i
& 32)));
90 for (const Hash
& a
: hashes
) {
91 for (const Hash
& b
: hashes
) {
92 // Hashes should equal themselves, but not equal all others.
102 // ProgramTest provides a fixture for writing compiling sample
103 // programs with CodeGen and verifying the linearized output matches
105 class ProgramTest
: public ::testing::Test
{
107 ProgramTest() : gen_(), node_hashes_() {}
109 // MakeInstruction calls CodeGen::MakeInstruction() and associated
110 // the returned address with a hash of the instruction.
111 CodeGen::Node
MakeInstruction(uint16_t code
,
113 CodeGen::Node jt
= CodeGen::kNullNode
,
114 CodeGen::Node jf
= CodeGen::kNullNode
) {
115 CodeGen::Node res
= gen_
.MakeInstruction(code
, k
, jt
, jf
);
116 EXPECT_NE(CodeGen::kNullNode
, res
);
118 Hash
digest(code
, k
, Lookup(jt
), Lookup(jf
));
119 auto it
= node_hashes_
.insert(std::make_pair(res
, digest
));
120 EXPECT_EQ(digest
, it
.first
->second
);
125 // RunTest compiles the program and verifies that the output matches
126 // what is expected. It should be called at the end of each program
128 void RunTest(CodeGen::Node head
) {
129 // Compile the program
130 CodeGen::Program program
;
131 gen_
.Compile(head
, &program
);
133 // Walk the program backwards, and compute the hash for each instruction.
134 std::vector
<Hash
> prog_hashes(program
.size());
135 for (size_t i
= program
.size(); i
> 0; --i
) {
136 const sock_filter
& insn
= program
.at(i
- 1);
137 Hash
& hash
= prog_hashes
.at(i
- 1);
139 if (BPF_CLASS(insn
.code
) == BPF_JMP
) {
140 if (BPF_OP(insn
.code
) == BPF_JA
) {
141 // The compiler adds JA instructions as needed, so skip them.
142 hash
= prog_hashes
.at(i
+ insn
.k
);
144 hash
= Hash(insn
.code
, insn
.k
, prog_hashes
.at(i
+ insn
.jt
),
145 prog_hashes
.at(i
+ insn
.jf
));
147 } else if (BPF_CLASS(insn
.code
) == BPF_RET
) {
148 hash
= Hash(insn
.code
, insn
.k
);
150 hash
= Hash(insn
.code
, insn
.k
, prog_hashes
.at(i
));
154 EXPECT_EQ(Lookup(head
), prog_hashes
.at(0));
158 const Hash
& Lookup(CodeGen::Node next
) const {
159 if (next
== CodeGen::kNullNode
) {
162 auto it
= node_hashes_
.find(next
);
163 if (it
== node_hashes_
.end()) {
164 ADD_FAILURE() << "No hash found for node " << next
;
171 std::map
<CodeGen::Node
, Hash
> node_hashes_
;
173 DISALLOW_COPY_AND_ASSIGN(ProgramTest
);
176 TEST_F(ProgramTest
, OneInstruction
) {
177 // Create the most basic valid BPF program:
179 CodeGen::Node head
= MakeInstruction(BPF_RET
+ BPF_K
, 0);
183 TEST_F(ProgramTest
, SimpleBranch
) {
184 // Create a program with a single branch:
185 // JUMP if eq 42 then $0 else $1
188 CodeGen::Node head
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 42,
189 MakeInstruction(BPF_RET
+ BPF_K
, 1),
190 MakeInstruction(BPF_RET
+ BPF_K
, 0));
194 TEST_F(ProgramTest
, AtypicalBranch
) {
195 // Create a program with a single branch:
196 // JUMP if eq 42 then $0 else $0
199 CodeGen::Node ret
= MakeInstruction(BPF_RET
+ BPF_K
, 0);
200 CodeGen::Node head
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 42, ret
, ret
);
202 // N.B.: As the instructions in both sides of the branch are already
203 // the same object, we do not actually have any "mergeable" branches.
204 // This needs to be reflected in our choice of "flags".
208 TEST_F(ProgramTest
, Complex
) {
209 // Creates a basic BPF program that we'll use to test some of the code:
210 // JUMP if eq 42 the $0 else $1 (insn6)
212 // 1: JUMP if eq 42 then $2 else $4 (insn4)
213 // 2: JUMP to $3 (insn2)
218 CodeGen::Node insn0
= MakeInstruction(BPF_RET
+ BPF_K
, 42);
219 CodeGen::Node insn1
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 42, insn0
);
220 CodeGen::Node insn2
= insn1
; // Implicit JUMP
222 // We explicitly duplicate instructions to test that they're merged.
223 CodeGen::Node insn3
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 42,
224 MakeInstruction(BPF_RET
+ BPF_K
, 42));
225 EXPECT_EQ(insn2
, insn3
);
227 CodeGen::Node insn4
=
228 MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 42, insn2
, insn3
);
229 CodeGen::Node insn5
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 23, insn4
);
231 // Force a basic block that ends in neither a jump instruction nor a return
232 // instruction. It only contains "insn5". This exercises one of the less
233 // common code paths in the topo-sort algorithm.
234 // This also gives us a diamond-shaped pattern in our graph, which stresses
235 // another aspect of the topo-sort algorithm (namely, the ability to
236 // correctly count the incoming branches for subtrees that are not disjunct).
237 CodeGen::Node insn6
=
238 MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 42, insn5
, insn4
);
243 TEST_F(ProgramTest
, ConfusingTails
) {
244 // This simple program demonstrates https://crbug.com/351103/
245 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could
246 // be tempted to merge them since they are the same. However, they are
247 // not mergeable because they fall-through to non semantically equivalent
249 // Without the fix for this bug, this program should trigger the check in
250 // CompileAndCompare: the serialized graphs from the program and its compiled
251 // version will differ.
254 // 1) if A == 0x1; then JMP 2 else JMP 3
255 // 2) LOAD 0 // System call number
256 // 3) if A == 0x2; then JMP 4 else JMP 5
257 // 4) LOAD 0 // System call number
258 // 5) if A == 0x1; then JMP 6 else JMP 7
262 CodeGen::Node i7
= MakeInstruction(BPF_RET
+ BPF_K
, 1);
263 CodeGen::Node i6
= MakeInstruction(BPF_RET
+ BPF_K
, 0);
264 CodeGen::Node i5
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i6
, i7
);
265 CodeGen::Node i4
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 0, i5
);
266 CodeGen::Node i3
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 2, i4
, i5
);
267 CodeGen::Node i2
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 0, i3
);
268 CodeGen::Node i1
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i2
, i3
);
269 CodeGen::Node i0
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 1, i1
);
274 TEST_F(ProgramTest
, ConfusingTailsBasic
) {
275 // Without the fix for https://crbug.com/351103/, (see
276 // SampleProgramConfusingTails()), this would generate a cyclic graph and
277 // crash as the two "LOAD 0" instructions would get merged.
280 // 1) if A == 0x1; then JMP 2 else JMP 3
281 // 2) LOAD 0 // System call number
282 // 3) if A == 0x2; then JMP 4 else JMP 5
283 // 4) LOAD 0 // System call number
286 CodeGen::Node i5
= MakeInstruction(BPF_RET
+ BPF_K
, 1);
287 CodeGen::Node i4
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 0, i5
);
288 CodeGen::Node i3
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 2, i4
, i5
);
289 CodeGen::Node i2
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 0, i3
);
290 CodeGen::Node i1
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i2
, i3
);
291 CodeGen::Node i0
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 1, i1
);
296 TEST_F(ProgramTest
, ConfusingTailsMergeable
) {
297 // This is similar to SampleProgramConfusingTails(), except that
298 // instructions 2 and 4 are now RET instructions.
299 // In PointerCompare(), this exercises the path where two blocks are of the
300 // same length and identical and the last instruction is a JMP or RET, so the
301 // following blocks don't need to be looked at and the blocks are mergeable.
304 // 1) if A == 0x1; then JMP 2 else JMP 3
306 // 3) if A == 0x2; then JMP 4 else JMP 5
308 // 5) if A == 0x1; then JMP 6 else JMP 7
312 CodeGen::Node i7
= MakeInstruction(BPF_RET
+ BPF_K
, 1);
313 CodeGen::Node i6
= MakeInstruction(BPF_RET
+ BPF_K
, 0);
314 CodeGen::Node i5
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i6
, i7
);
315 CodeGen::Node i4
= MakeInstruction(BPF_RET
+ BPF_K
, 42);
316 CodeGen::Node i3
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 2, i4
, i5
);
317 CodeGen::Node i2
= MakeInstruction(BPF_RET
+ BPF_K
, 42);
318 CodeGen::Node i1
= MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i2
, i3
);
319 CodeGen::Node i0
= MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 1, i1
);
324 TEST_F(ProgramTest
, InstructionFolding
) {
325 // Check that simple instructions are folded as expected.
326 CodeGen::Node a
= MakeInstruction(BPF_RET
+ BPF_K
, 0);
327 EXPECT_EQ(a
, MakeInstruction(BPF_RET
+ BPF_K
, 0));
328 CodeGen::Node b
= MakeInstruction(BPF_RET
+ BPF_K
, 1);
329 EXPECT_EQ(a
, MakeInstruction(BPF_RET
+ BPF_K
, 0));
330 EXPECT_EQ(b
, MakeInstruction(BPF_RET
+ BPF_K
, 1));
331 EXPECT_EQ(b
, MakeInstruction(BPF_RET
+ BPF_K
, 1));
333 // Check that complex sequences are folded too.
335 MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 0,
336 MakeInstruction(BPF_JMP
+ BPF_JSET
+ BPF_K
, 0x100, a
, b
));
337 EXPECT_EQ(c
, MakeInstruction(
338 BPF_LD
+ BPF_W
+ BPF_ABS
, 0,
339 MakeInstruction(BPF_JMP
+ BPF_JSET
+ BPF_K
, 0x100, a
, b
)));
344 TEST_F(ProgramTest
, FarBranches
) {
345 // BPF instructions use 8-bit fields for branch offsets, which means
346 // branch targets must be within 255 instructions of the branch
347 // instruction. CodeGen abstracts away this detail by inserting jump
348 // instructions as needed, which we test here by generating programs
349 // that should trigger any interesting boundary conditions.
351 // Populate with 260 initial instruction nodes.
352 std::vector
<CodeGen::Node
> nodes
;
353 nodes
.push_back(MakeInstruction(BPF_RET
+ BPF_K
, 0));
354 for (size_t i
= 1; i
< 260; ++i
) {
356 MakeInstruction(BPF_ALU
+ BPF_ADD
+ BPF_K
, i
, nodes
.back()));
359 // Exhaustively test branch offsets near BPF's limits.
360 for (size_t jt
= 250; jt
< 260; ++jt
) {
361 for (size_t jf
= 250; jf
< 260; ++jf
) {
362 nodes
.push_back(MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 0,
363 nodes
.rbegin()[jt
], nodes
.rbegin()[jf
]));
364 RunTest(nodes
.back());
369 TEST_F(ProgramTest
, JumpReuse
) {
370 // As a code size optimization, we try to reuse jumps when possible
371 // instead of emitting new ones. Here we make sure that optimization
372 // is working as intended.
374 // NOTE: To simplify testing, we rely on implementation details
375 // about what CodeGen::Node values indicate (i.e., vector indices),
376 // but CodeGen users should treat them as opaque values.
378 // Populate with 260 initial instruction nodes.
379 std::vector
<CodeGen::Node
> nodes
;
380 nodes
.push_back(MakeInstruction(BPF_RET
+ BPF_K
, 0));
381 for (size_t i
= 1; i
< 260; ++i
) {
383 MakeInstruction(BPF_ALU
+ BPF_ADD
+ BPF_K
, i
, nodes
.back()));
386 // Branching to nodes[0] and nodes[1] should require 3 new
387 // instructions: two far jumps plus the branch itself.
389 MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 0, nodes
[0], nodes
[1]);
390 EXPECT_EQ(nodes
.back() + 3, one
); // XXX: Implementation detail!
393 // Branching again to the same target nodes should require only one
394 // new instruction, as we can reuse the previous branch's jumps.
396 MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, nodes
[0], nodes
[1]);
397 EXPECT_EQ(one
+ 1, two
); // XXX: Implementation detail!
402 } // namespace sandbox