1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
11 #include "sandbox/linux/seccomp-bpf/codegen.h"
12 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
13 #include "sandbox/linux/tests/unit_tests.h"
17 class SandboxUnittestHelper
: public SandboxBPF
{
19 typedef SandboxBPF::Program Program
;
22 // We want to access some of the private methods in the code generator. We
23 // do so by defining a "friend" that makes these methods public for us.
24 class CodeGenUnittestHelper
: public CodeGen
{
26 void FindBranchTargets(const Instruction
& instructions
,
27 BranchTargets
* branch_targets
) {
28 CodeGen::FindBranchTargets(instructions
, branch_targets
);
31 BasicBlock
* CutGraphIntoBasicBlocks(Instruction
* insns
,
32 const BranchTargets
& branch_targets
,
33 TargetsToBlocks
* blocks
) {
34 return CodeGen::CutGraphIntoBasicBlocks(insns
, branch_targets
, blocks
);
37 void MergeTails(TargetsToBlocks
* blocks
) { CodeGen::MergeTails(blocks
); }
40 enum { NO_FLAGS
= 0x0000, HAS_MERGEABLE_TAILS
= 0x0001, };
42 Instruction
* SampleProgramOneInstruction(CodeGen
* codegen
, int* flags
) {
43 // Create the most basic valid BPF program:
46 return codegen
->MakeInstruction(BPF_RET
+ BPF_K
,
47 ErrorCode(ErrorCode::ERR_ALLOWED
));
50 Instruction
* SampleProgramSimpleBranch(CodeGen
* codegen
, int* flags
) {
51 // Create a program with a single branch:
52 // JUMP if eq 42 then $0 else $1
56 return codegen
->MakeInstruction(
57 BPF_JMP
+ BPF_JEQ
+ BPF_K
,
59 codegen
->MakeInstruction(BPF_RET
+ BPF_K
, ErrorCode(EPERM
)),
60 codegen
->MakeInstruction(BPF_RET
+ BPF_K
,
61 ErrorCode(ErrorCode::ERR_ALLOWED
)));
64 Instruction
* SampleProgramAtypicalBranch(CodeGen
* codegen
, int* flags
) {
65 // Create a program with a single branch:
66 // JUMP if eq 42 then $0 else $0
69 // N.B.: As the instructions in both sides of the branch are already
70 // the same object, we do not actually have any "mergeable" branches.
71 // This needs to be reflected in our choice of "flags".
74 Instruction
* ret
= codegen
->MakeInstruction(
75 BPF_RET
+ BPF_K
, ErrorCode(ErrorCode::ERR_ALLOWED
));
76 return codegen
->MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 42, ret
, ret
);
79 Instruction
* SampleProgramComplex(CodeGen
* codegen
, int* flags
) {
80 // Creates a basic BPF program that we'll use to test some of the code:
81 // JUMP if eq 42 the $0 else $1 (insn6)
83 // 1: JUMP if eq 42 then $2 else $4 (insn4)
84 // 2: JUMP to $3 (insn1)
86 // RET ErrorCode(42) (insn2)
88 // RET ErrorCode(42) (insn3+)
89 *flags
= HAS_MERGEABLE_TAILS
;
91 Instruction
* insn0
= codegen
->MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 42);
92 SANDBOX_ASSERT(insn0
);
93 SANDBOX_ASSERT(insn0
->code
== BPF_LD
+ BPF_W
+ BPF_ABS
);
94 SANDBOX_ASSERT(insn0
->k
== 42);
95 SANDBOX_ASSERT(insn0
->next
== NULL
);
97 Instruction
* insn1
= codegen
->MakeInstruction(BPF_JMP
+ BPF_JA
, 0, insn0
);
98 SANDBOX_ASSERT(insn1
);
99 SANDBOX_ASSERT(insn1
->code
== BPF_JMP
+ BPF_JA
);
100 SANDBOX_ASSERT(insn1
->jt_ptr
== insn0
);
102 Instruction
* insn2
= codegen
->MakeInstruction(BPF_RET
+ BPF_K
, ErrorCode(42));
103 SANDBOX_ASSERT(insn2
);
104 SANDBOX_ASSERT(insn2
->code
== BPF_RET
+ BPF_K
);
105 SANDBOX_ASSERT(insn2
->next
== NULL
);
107 // We explicitly duplicate instructions so that MergeTails() can coalesce
109 Instruction
* insn3
= codegen
->MakeInstruction(
110 BPF_LD
+ BPF_W
+ BPF_ABS
,
112 codegen
->MakeInstruction(BPF_RET
+ BPF_K
, ErrorCode(42)));
115 codegen
->MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 42, insn1
, insn3
);
116 SANDBOX_ASSERT(insn4
);
117 SANDBOX_ASSERT(insn4
->code
== BPF_JMP
+ BPF_JEQ
+ BPF_K
);
118 SANDBOX_ASSERT(insn4
->k
== 42);
119 SANDBOX_ASSERT(insn4
->jt_ptr
== insn1
);
120 SANDBOX_ASSERT(insn4
->jf_ptr
== insn3
);
122 codegen
->JoinInstructions(insn0
, insn2
);
123 SANDBOX_ASSERT(insn0
->next
== insn2
);
126 codegen
->MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 23, insn4
);
127 SANDBOX_ASSERT(insn5
);
128 SANDBOX_ASSERT(insn5
->code
== BPF_LD
+ BPF_W
+ BPF_ABS
);
129 SANDBOX_ASSERT(insn5
->k
== 23);
130 SANDBOX_ASSERT(insn5
->next
== insn4
);
132 // Force a basic block that ends in neither a jump instruction nor a return
133 // instruction. It only contains "insn5". This exercises one of the less
134 // common code paths in the topo-sort algorithm.
135 // This also gives us a diamond-shaped pattern in our graph, which stresses
136 // another aspect of the topo-sort algorithm (namely, the ability to
137 // correctly count the incoming branches for subtrees that are not disjunct).
139 codegen
->MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 42, insn5
, insn4
);
144 Instruction
* SampleProgramConfusingTails(CodeGen
* codegen
, int* flags
) {
145 // This simple program demonstrates https://crbug.com/351103/
146 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could
147 // be tempted to merge them since they are the same. However, they are
148 // not mergeable because they fall-through to non semantically equivalent
150 // Without the fix for this bug, this program should trigger the check in
151 // CompileAndCompare: the serialized graphs from the program and its compiled
152 // version will differ.
155 // 1) if A == 0x1; then JMP 2 else JMP 3
156 // 2) LOAD 0 // System call number
157 // 3) if A == 0x2; then JMP 4 else JMP 5
158 // 4) LOAD 0 // System call number
159 // 5) if A == 0x1; then JMP 6 else JMP 7
160 // 6) RET 0x50000 // errno = 0
161 // 7) RET 0x50001 // errno = 1
164 Instruction
* i7
= codegen
->MakeInstruction(BPF_RET
, ErrorCode(1));
165 Instruction
* i6
= codegen
->MakeInstruction(BPF_RET
, ErrorCode(0));
167 codegen
->MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i6
, i7
);
168 Instruction
* i4
= codegen
->MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 0, i5
);
170 codegen
->MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 2, i4
, i5
);
171 Instruction
* i2
= codegen
->MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 0, i3
);
173 codegen
->MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i2
, i3
);
174 Instruction
* i0
= codegen
->MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 1, i1
);
179 Instruction
* SampleProgramConfusingTailsBasic(CodeGen
* codegen
, int* flags
) {
180 // Without the fix for https://crbug.com/351103/, (see
181 // SampleProgramConfusingTails()), this would generate a cyclic graph and
182 // crash as the two "LOAD 0" instructions would get merged.
185 // 1) if A == 0x1; then JMP 2 else JMP 3
186 // 2) LOAD 0 // System call number
187 // 3) if A == 0x2; then JMP 4 else JMP 5
188 // 4) LOAD 0 // System call number
189 // 5) RET 0x50001 // errno = 1
192 Instruction
* i5
= codegen
->MakeInstruction(BPF_RET
, ErrorCode(1));
193 Instruction
* i4
= codegen
->MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 0, i5
);
195 codegen
->MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 2, i4
, i5
);
196 Instruction
* i2
= codegen
->MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 0, i3
);
198 codegen
->MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i2
, i3
);
199 Instruction
* i0
= codegen
->MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 1, i1
);
204 Instruction
* SampleProgramConfusingTailsMergeable(CodeGen
* codegen
,
206 // This is similar to SampleProgramConfusingTails(), except that
207 // instructions 2 and 4 are now RET instructions.
208 // In PointerCompare(), this exercises the path where two blocks are of the
209 // same length and identical and the last instruction is a JMP or RET, so the
210 // following blocks don't need to be looked at and the blocks are mergeable.
213 // 1) if A == 0x1; then JMP 2 else JMP 3
214 // 2) RET 0x5002a // errno = 42
215 // 3) if A == 0x2; then JMP 4 else JMP 5
216 // 4) RET 0x5002a // errno = 42
217 // 5) if A == 0x1; then JMP 6 else JMP 7
218 // 6) RET 0x50000 // errno = 0
219 // 7) RET 0x50001 // errno = 1
220 *flags
= HAS_MERGEABLE_TAILS
;
222 Instruction
* i7
= codegen
->MakeInstruction(BPF_RET
, ErrorCode(1));
223 Instruction
* i6
= codegen
->MakeInstruction(BPF_RET
, ErrorCode(0));
225 codegen
->MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i6
, i7
);
226 Instruction
* i4
= codegen
->MakeInstruction(BPF_RET
, ErrorCode(42));
228 codegen
->MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 2, i4
, i5
);
229 Instruction
* i2
= codegen
->MakeInstruction(BPF_RET
, ErrorCode(42));
231 codegen
->MakeInstruction(BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, i2
, i3
);
232 Instruction
* i0
= codegen
->MakeInstruction(BPF_LD
+ BPF_W
+ BPF_ABS
, 1, i1
);
236 void ForAllPrograms(void (*test
)(CodeGenUnittestHelper
*, Instruction
*, int)) {
237 Instruction
* (*function_table
[])(CodeGen
* codegen
, int* flags
) = {
238 SampleProgramOneInstruction
,
239 SampleProgramSimpleBranch
,
240 SampleProgramAtypicalBranch
,
241 SampleProgramComplex
,
242 SampleProgramConfusingTails
,
243 SampleProgramConfusingTailsBasic
,
244 SampleProgramConfusingTailsMergeable
,
247 for (size_t i
= 0; i
< arraysize(function_table
); ++i
) {
248 CodeGenUnittestHelper codegen
;
249 int flags
= NO_FLAGS
;
250 Instruction
*prg
= function_table
[i
](&codegen
, &flags
);
251 test(&codegen
, prg
, flags
);
255 void MakeInstruction(CodeGenUnittestHelper
* codegen
,
256 Instruction
* program
, int) {
257 // Nothing to do here
260 SANDBOX_TEST(CodeGen
, MakeInstruction
) {
261 ForAllPrograms(MakeInstruction
);
264 void FindBranchTargets(CodeGenUnittestHelper
* codegen
, Instruction
* prg
, int) {
265 BranchTargets branch_targets
;
266 codegen
->FindBranchTargets(*prg
, &branch_targets
);
268 // Verifying the general properties that should be true for every
269 // well-formed BPF program.
270 // Perform a depth-first traversal of the BPF program an verify that all
271 // targets of BPF_JMP instructions are represented in the "branch_targets".
272 // At the same time, compute a set of both the branch targets and all the
273 // instructions in the program.
274 std::vector
<Instruction
*> stack
;
275 std::set
<Instruction
*> all_instructions
;
276 std::set
<Instruction
*> target_instructions
;
277 BranchTargets::const_iterator end
= branch_targets
.end();
278 for (Instruction
* insn
= prg
;;) {
279 all_instructions
.insert(insn
);
280 if (BPF_CLASS(insn
->code
) == BPF_JMP
) {
281 target_instructions
.insert(insn
->jt_ptr
);
282 SANDBOX_ASSERT(insn
->jt_ptr
!= NULL
);
283 SANDBOX_ASSERT(branch_targets
.find(insn
->jt_ptr
) != end
);
284 if (BPF_OP(insn
->code
) != BPF_JA
) {
285 target_instructions
.insert(insn
->jf_ptr
);
286 SANDBOX_ASSERT(insn
->jf_ptr
!= NULL
);
287 SANDBOX_ASSERT(branch_targets
.find(insn
->jf_ptr
) != end
);
288 stack
.push_back(insn
->jf_ptr
);
291 } else if (BPF_CLASS(insn
->code
) == BPF_RET
) {
292 SANDBOX_ASSERT(insn
->next
== NULL
);
299 SANDBOX_ASSERT(insn
->next
!= NULL
);
303 SANDBOX_ASSERT(target_instructions
.size() == branch_targets
.size());
305 // We can now subtract the set of the branch targets from the set of all
306 // instructions. This gives us a set with the instructions that nobody
307 // ever jumps to. Verify that they are no included in the
308 // "branch_targets" that FindBranchTargets() computed for us.
309 Instructions
non_target_instructions(all_instructions
.size() -
310 target_instructions
.size());
311 set_difference(all_instructions
.begin(),
312 all_instructions
.end(),
313 target_instructions
.begin(),
314 target_instructions
.end(),
315 non_target_instructions
.begin());
316 for (Instructions::const_iterator iter
= non_target_instructions
.begin();
317 iter
!= non_target_instructions
.end();
319 SANDBOX_ASSERT(branch_targets
.find(*iter
) == end
);
323 SANDBOX_TEST(CodeGen
, FindBranchTargets
) { ForAllPrograms(FindBranchTargets
); }
325 void CutGraphIntoBasicBlocks(CodeGenUnittestHelper
* codegen
,
328 BranchTargets branch_targets
;
329 codegen
->FindBranchTargets(*prg
, &branch_targets
);
330 TargetsToBlocks all_blocks
;
331 BasicBlock
* first_block
=
332 codegen
->CutGraphIntoBasicBlocks(prg
, branch_targets
, &all_blocks
);
333 SANDBOX_ASSERT(first_block
!= NULL
);
334 SANDBOX_ASSERT(first_block
->instructions
.size() > 0);
335 Instruction
* first_insn
= first_block
->instructions
[0];
337 // Basic blocks are supposed to start with a branch target and end with
338 // either a jump or a return instruction. It can also end, if the next
339 // instruction forms the beginning of a new basic block. There should be
340 // no other jumps or return instructions in the middle of a basic block.
341 for (TargetsToBlocks::const_iterator bb_iter
= all_blocks
.begin();
342 bb_iter
!= all_blocks
.end();
344 BasicBlock
* bb
= bb_iter
->second
;
345 SANDBOX_ASSERT(bb
!= NULL
);
346 SANDBOX_ASSERT(bb
->instructions
.size() > 0);
347 Instruction
* insn
= bb
->instructions
[0];
348 SANDBOX_ASSERT(insn
== first_insn
||
349 branch_targets
.find(insn
) != branch_targets
.end());
350 for (Instructions::const_iterator insn_iter
= bb
->instructions
.begin();;) {
352 if (++insn_iter
!= bb
->instructions
.end()) {
353 SANDBOX_ASSERT(BPF_CLASS(insn
->code
) != BPF_JMP
);
354 SANDBOX_ASSERT(BPF_CLASS(insn
->code
) != BPF_RET
);
356 SANDBOX_ASSERT(BPF_CLASS(insn
->code
) == BPF_JMP
||
357 BPF_CLASS(insn
->code
) == BPF_RET
||
358 branch_targets
.find(insn
->next
) != branch_targets
.end());
361 SANDBOX_ASSERT(branch_targets
.find(*insn_iter
) == branch_targets
.end());
366 SANDBOX_TEST(CodeGen
, CutGraphIntoBasicBlocks
) {
367 ForAllPrograms(CutGraphIntoBasicBlocks
);
370 void MergeTails(CodeGenUnittestHelper
* codegen
, Instruction
* prg
, int flags
) {
371 BranchTargets branch_targets
;
372 codegen
->FindBranchTargets(*prg
, &branch_targets
);
373 TargetsToBlocks all_blocks
;
374 BasicBlock
* first_block
=
375 codegen
->CutGraphIntoBasicBlocks(prg
, branch_targets
, &all_blocks
);
377 // The shape of our graph and thus the function of our program should
378 // still be unchanged after we run MergeTails(). We verify this by
379 // serializing the graph and verifying that it is still the same.
380 // We also verify that at least some of the edges changed because of
382 std::string graph
[2];
383 std::string edges
[2];
385 // The loop executes twice. After the first run, we call MergeTails() on
388 // Traverse the entire program in depth-first order.
389 std::vector
<BasicBlock
*> stack
;
390 for (BasicBlock
* bb
= first_block
;;) {
391 // Serialize the instructions in this basic block. In general, we only
392 // need to serialize "code" and "k"; except for a BPF_JA instruction
393 // where "k" isn't set.
394 // The stream of instructions should be unchanged after MergeTails().
395 for (Instructions::const_iterator iter
= bb
->instructions
.begin();
396 iter
!= bb
->instructions
.end();
398 graph
[i
].append(reinterpret_cast<char*>(&(*iter
)->code
),
399 sizeof((*iter
)->code
));
400 if (BPF_CLASS((*iter
)->code
) != BPF_JMP
||
401 BPF_OP((*iter
)->code
) != BPF_JA
) {
402 graph
[i
].append(reinterpret_cast<char*>(&(*iter
)->k
),
407 // Also serialize the addresses the basic blocks as we encounter them.
408 // This will change as basic blocks are coalesed by MergeTails().
409 edges
[i
].append(reinterpret_cast<char*>(&bb
), sizeof(bb
));
411 // Depth-first traversal of the graph. We only ever need to look at the
412 // very last instruction in the basic block, as that is the only one that
413 // can change code flow.
414 Instruction
* insn
= bb
->instructions
.back();
415 if (BPF_CLASS(insn
->code
) == BPF_JMP
) {
416 // For jump instructions, we need to remember the "false" branch while
417 // traversing the "true" branch. This is not necessary for BPF_JA which
418 // only has a single branch.
419 if (BPF_OP(insn
->code
) != BPF_JA
) {
420 stack
.push_back(all_blocks
[insn
->jf_ptr
]);
422 bb
= all_blocks
[insn
->jt_ptr
];
423 } else if (BPF_CLASS(insn
->code
) == BPF_RET
) {
424 // After a BPF_RET, see if we need to back track.
431 // For "normal" instructions, just follow to the next basic block.
432 bb
= all_blocks
[insn
->next
];
436 // Our loop runs exactly two times.
440 codegen
->MergeTails(&all_blocks
);
442 SANDBOX_ASSERT(graph
[0] == graph
[1]);
443 if (flags
& HAS_MERGEABLE_TAILS
) {
444 SANDBOX_ASSERT(edges
[0] != edges
[1]);
446 SANDBOX_ASSERT(edges
[0] == edges
[1]);
450 SANDBOX_TEST(CodeGen
, MergeTails
) {
451 ForAllPrograms(MergeTails
);
454 void CompileAndCompare(CodeGenUnittestHelper
* codegen
, Instruction
* prg
, int) {
455 // TopoSortBasicBlocks() has internal checks that cause it to fail, if it
456 // detects a problem. Typically, if anything goes wrong, this looks to the
457 // TopoSort algorithm as if there had been cycles in the input data.
458 // This provides a pretty good unittest.
459 // We hand-crafted the program returned by SampleProgram() to exercise
460 // several of the more interesting code-paths. See comments in
461 // SampleProgram() for details.
462 // In addition to relying on the internal consistency checks in the compiler,
463 // we also serialize the graph and the resulting BPF program and compare
464 // them. With the exception of BPF_JA instructions that might have been
465 // inserted, both instruction streams should be equivalent.
466 // As Compile() modifies the instructions, we have to serialize the graph
467 // before calling Compile().
469 Instructions source_stack
;
470 for (const Instruction
* insn
= prg
, *next
; insn
; insn
= next
) {
471 if (BPF_CLASS(insn
->code
) == BPF_JMP
) {
472 if (BPF_OP(insn
->code
) == BPF_JA
) {
473 // Do not serialize BPF_JA instructions (see above).
477 source_stack
.push_back(insn
->jf_ptr
);
480 } else if (BPF_CLASS(insn
->code
) == BPF_RET
) {
481 if (source_stack
.empty()) {
484 next
= source_stack
.back();
485 source_stack
.pop_back();
490 // Only serialize "code" and "k". That's all the information we need to
491 // compare. The rest of the information is encoded in the order of
493 source
.append(reinterpret_cast<const char*>(&insn
->code
),
495 source
.append(reinterpret_cast<const char*>(&insn
->k
), sizeof(insn
->k
));
498 // Compile the program
499 SandboxUnittestHelper::Program bpf
;
500 codegen
->Compile(prg
, &bpf
);
502 // Serialize the resulting BPF instructions.
503 std::string assembly
;
504 std::vector
<int> assembly_stack
;
505 for (int idx
= 0; idx
>= 0;) {
506 SANDBOX_ASSERT(idx
< (int)bpf
.size());
507 struct sock_filter
& insn
= bpf
[idx
];
508 if (BPF_CLASS(insn
.code
) == BPF_JMP
) {
509 if (BPF_OP(insn
.code
) == BPF_JA
) {
510 // Do not serialize BPF_JA instructions (see above).
514 assembly_stack
.push_back(idx
+ insn
.jf
+ 1);
517 } else if (BPF_CLASS(insn
.code
) == BPF_RET
) {
518 if (assembly_stack
.empty()) {
521 idx
= assembly_stack
.back();
522 assembly_stack
.pop_back();
527 // Serialize the same information that we serialized before compilation.
528 assembly
.append(reinterpret_cast<char*>(&insn
.code
), sizeof(insn
.code
));
529 assembly
.append(reinterpret_cast<char*>(&insn
.k
), sizeof(insn
.k
));
531 SANDBOX_ASSERT(source
== assembly
);
534 SANDBOX_TEST(CodeGen
, All
) {
535 ForAllPrograms(CompileAndCompare
);
538 } // namespace sandbox