1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "base/logging.h"
8 #include "sandbox/linux/seccomp-bpf/codegen.h"
12 // Helper function for Traverse().
13 void TraverseRecursively(std::set
<sandbox::Instruction
*>* visited
,
14 sandbox::Instruction
* instruction
) {
15 if (visited
->find(instruction
) == visited
->end()) {
16 visited
->insert(instruction
);
17 switch (BPF_CLASS(instruction
->code
)) {
19 if (BPF_OP(instruction
->code
) != BPF_JA
) {
20 TraverseRecursively(visited
, instruction
->jf_ptr
);
22 TraverseRecursively(visited
, instruction
->jt_ptr
);
27 TraverseRecursively(visited
, instruction
->next
);
37 CodeGen::CodeGen() : compiled_(false) {}
40 for (Instructions::iterator iter
= instructions_
.begin();
41 iter
!= instructions_
.end();
45 for (BasicBlocks::iterator iter
= basic_blocks_
.begin();
46 iter
!= basic_blocks_
.end();
52 void CodeGen::PrintProgram(const SandboxBPF::Program
& program
) {
53 for (SandboxBPF::Program::const_iterator iter
= program
.begin();
54 iter
!= program
.end();
56 int ip
= (int)(iter
- program
.begin());
57 fprintf(stderr
, "%3d) ", ip
);
58 switch (BPF_CLASS(iter
->code
)) {
60 if (iter
->code
== BPF_LD
+ BPF_W
+ BPF_ABS
) {
61 fprintf(stderr
, "LOAD %d // ", (int)iter
->k
);
62 if (iter
->k
== offsetof(struct arch_seccomp_data
, nr
)) {
63 fprintf(stderr
, "System call number\n");
64 } else if (iter
->k
== offsetof(struct arch_seccomp_data
, arch
)) {
65 fprintf(stderr
, "Architecture\n");
67 offsetof(struct arch_seccomp_data
, instruction_pointer
)) {
68 fprintf(stderr
, "Instruction pointer (LSB)\n");
70 offsetof(struct arch_seccomp_data
, instruction_pointer
) +
72 fprintf(stderr
, "Instruction pointer (MSB)\n");
73 } else if (iter
->k
>= offsetof(struct arch_seccomp_data
, args
) &&
74 iter
->k
< offsetof(struct arch_seccomp_data
, args
) + 48 &&
75 (iter
->k
- offsetof(struct arch_seccomp_data
, args
)) % 4 ==
79 "Argument %d (%cSB)\n",
80 (int)(iter
->k
- offsetof(struct arch_seccomp_data
, args
)) / 8,
81 (iter
->k
- offsetof(struct arch_seccomp_data
, args
)) % 8 ? 'M'
84 fprintf(stderr
, "???\n");
87 fprintf(stderr
, "LOAD ???\n");
91 if (BPF_OP(iter
->code
) == BPF_JA
) {
92 fprintf(stderr
, "JMP %d\n", ip
+ iter
->k
+ 1);
94 fprintf(stderr
, "if A %s 0x%x; then JMP %d else JMP %d\n",
95 BPF_OP(iter
->code
) == BPF_JSET
? "&" :
96 BPF_OP(iter
->code
) == BPF_JEQ
? "==" :
97 BPF_OP(iter
->code
) == BPF_JGE
? ">=" :
98 BPF_OP(iter
->code
) == BPF_JGT
? ">" : "???",
100 ip
+ iter
->jt
+ 1, ip
+ iter
->jf
+ 1);
104 fprintf(stderr
, "RET 0x%x // ", iter
->k
);
105 if ((iter
->k
& SECCOMP_RET_ACTION
) == SECCOMP_RET_TRAP
) {
106 fprintf(stderr
, "Trap #%d\n", iter
->k
& SECCOMP_RET_DATA
);
107 } else if ((iter
->k
& SECCOMP_RET_ACTION
) == SECCOMP_RET_ERRNO
) {
108 fprintf(stderr
, "errno = %d\n", iter
->k
& SECCOMP_RET_DATA
);
109 } else if ((iter
->k
& SECCOMP_RET_ACTION
) == SECCOMP_RET_TRACE
) {
110 fprintf(stderr
, "Trace #%d\n", iter
->k
& SECCOMP_RET_DATA
);
111 } else if (iter
->k
== SECCOMP_RET_ALLOW
) {
112 fprintf(stderr
, "Allowed\n");
114 fprintf(stderr
, "???\n");
118 fprintf(stderr
, BPF_OP(iter
->code
) == BPF_NEG
119 ? "A := -A\n" : "A := A %s 0x%x\n",
120 BPF_OP(iter
->code
) == BPF_ADD
? "+" :
121 BPF_OP(iter
->code
) == BPF_SUB
? "-" :
122 BPF_OP(iter
->code
) == BPF_MUL
? "*" :
123 BPF_OP(iter
->code
) == BPF_DIV
? "/" :
124 BPF_OP(iter
->code
) == BPF_MOD
? "%" :
125 BPF_OP(iter
->code
) == BPF_OR
? "|" :
126 BPF_OP(iter
->code
) == BPF_XOR
? "^" :
127 BPF_OP(iter
->code
) == BPF_AND
? "&" :
128 BPF_OP(iter
->code
) == BPF_LSH
? "<<" :
129 BPF_OP(iter
->code
) == BPF_RSH
? ">>" : "???",
133 fprintf(stderr
, "???\n");
140 Instruction
* CodeGen::MakeInstruction(uint16_t code
,
143 // We can handle non-jumping instructions and "always" jumps. Both of
144 // them are followed by exactly one "next" instruction.
145 // We allow callers to defer specifying "next", but then they must call
146 // "joinInstructions" later.
147 if (BPF_CLASS(code
) == BPF_JMP
&& BPF_OP(code
) != BPF_JA
) {
149 "Must provide both \"true\" and \"false\" branch "
152 if (next
&& BPF_CLASS(code
) == BPF_RET
) {
153 SANDBOX_DIE("Cannot append instructions after a return statement");
155 if (BPF_CLASS(code
) == BPF_JMP
) {
156 // "Always" jumps use the "true" branch target, only.
157 Instruction
* insn
= new Instruction(code
, 0, next
, NULL
);
158 instructions_
.push_back(insn
);
161 // Non-jumping instructions do not use any of the branch targets.
162 Instruction
* insn
= new Instruction(code
, k
, next
);
163 instructions_
.push_back(insn
);
168 Instruction
* CodeGen::MakeInstruction(uint16_t code
, const ErrorCode
& err
) {
169 if (BPF_CLASS(code
) != BPF_RET
) {
170 SANDBOX_DIE("ErrorCodes can only be used in return expressions");
172 if (err
.error_type_
!= ErrorCode::ET_SIMPLE
&&
173 err
.error_type_
!= ErrorCode::ET_TRAP
) {
174 SANDBOX_DIE("ErrorCode is not suitable for returning from a BPF program");
176 return MakeInstruction(code
, err
.err_
);
179 Instruction
* CodeGen::MakeInstruction(uint16_t code
,
183 // We can handle all conditional jumps. They are followed by both a
184 // "true" and a "false" branch.
185 if (BPF_CLASS(code
) != BPF_JMP
|| BPF_OP(code
) == BPF_JA
) {
186 SANDBOX_DIE("Expected a BPF_JMP instruction");
189 // We allow callers to defer specifying exactly one of the branch
190 // targets. It must then be set later by calling "JoinInstructions".
191 SANDBOX_DIE("Branches must jump to a valid instruction");
193 Instruction
* insn
= new Instruction(code
, k
, jt
, jf
);
194 instructions_
.push_back(insn
);
198 void CodeGen::JoinInstructions(Instruction
* head
, Instruction
* tail
) {
199 // Merge two instructions, or set the branch target for an "always" jump.
200 // This function should be called, if the caller didn't initially provide
201 // a value for "next" when creating the instruction.
202 if (BPF_CLASS(head
->code
) == BPF_JMP
) {
203 if (BPF_OP(head
->code
) == BPF_JA
) {
205 SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
209 if (!head
->jt_ptr
&& head
->jf_ptr
) {
211 } else if (!head
->jf_ptr
&& head
->jt_ptr
) {
214 SANDBOX_DIE("Cannot append instructions after a jump");
217 } else if (BPF_CLASS(head
->code
) == BPF_RET
) {
218 SANDBOX_DIE("Cannot append instructions after a return statement");
219 } else if (head
->next
) {
220 SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
227 void CodeGen::Traverse(Instruction
* instruction
,
228 void (*fnc
)(Instruction
*, void*),
230 std::set
<Instruction
*> visited
;
231 TraverseRecursively(&visited
, instruction
);
232 for (std::set
<Instruction
*>::const_iterator iter
= visited
.begin();
233 iter
!= visited
.end();
239 void CodeGen::FindBranchTargets(const Instruction
& instructions
,
240 BranchTargets
* branch_targets
) {
241 // Follow all possible paths through the "instructions" graph and compute
242 // a list of branch targets. This will later be needed to compute the
243 // boundaries of basic blocks.
244 // We maintain a set of all instructions that we have previously seen. This
245 // set ultimately converges on all instructions in the program.
246 std::set
<const Instruction
*> seen_instructions
;
248 for (const Instruction
* insn
= &instructions
; insn
;) {
249 seen_instructions
.insert(insn
);
250 if (BPF_CLASS(insn
->code
) == BPF_JMP
) {
251 // Found a jump. Increase count of incoming edges for each of the jump
253 ++(*branch_targets
)[insn
->jt_ptr
];
254 if (BPF_OP(insn
->code
) != BPF_JA
) {
255 ++(*branch_targets
)[insn
->jf_ptr
];
256 stack
.push_back(const_cast<Instruction
*>(insn
));
258 // Start a recursive decent for depth-first traversal.
259 if (seen_instructions
.find(insn
->jt_ptr
) == seen_instructions
.end()) {
260 // We haven't seen the "true" branch yet. Traverse it now. We have
261 // already remembered the "false" branch on the stack and will
262 // traverse it later.
266 // Now try traversing the "false" branch.
270 // This is a non-jump instruction, just continue to the next instruction
271 // (if any). It's OK if "insn" becomes NULL when reaching a return
273 if (!insn
->next
!= (BPF_CLASS(insn
->code
) == BPF_RET
)) {
275 "Internal compiler error; return instruction must be at "
276 "the end of the BPF program");
278 if (seen_instructions
.find(insn
->next
) == seen_instructions
.end()) {
281 // We have seen this instruction before. That could happen if it is
282 // a branch target. No need to continue processing.
286 while (!insn
&& !stack
.empty()) {
287 // We are done processing all the way to a leaf node, backtrack up the
288 // stack to any branches that we haven't processed yet. By definition,
289 // this has to be a "false" branch, as we always process the "true"
290 // branches right away.
293 if (seen_instructions
.find(insn
->jf_ptr
) == seen_instructions
.end()) {
294 // We haven't seen the "false" branch yet. So, that's where we'll
298 // We have seen both the "true" and the "false" branch, continue
300 if (seen_instructions
.find(insn
->jt_ptr
) == seen_instructions
.end()) {
302 "Internal compiler error; cannot find all "
312 BasicBlock
* CodeGen::MakeBasicBlock(Instruction
* head
, Instruction
* tail
) {
313 // Iterate over all the instructions between "head" and "tail" and
314 // insert them into a new basic block.
315 BasicBlock
* bb
= new BasicBlock
;
316 for (;; head
= head
->next
) {
317 bb
->instructions
.push_back(head
);
321 if (BPF_CLASS(head
->code
) == BPF_JMP
) {
322 SANDBOX_DIE("Found a jump inside of a basic block");
325 basic_blocks_
.push_back(bb
);
329 void CodeGen::AddBasicBlock(Instruction
* head
,
331 const BranchTargets
& branch_targets
,
332 TargetsToBlocks
* basic_blocks
,
333 BasicBlock
** firstBlock
) {
334 // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
335 // has not been set before.
336 BranchTargets::const_iterator iter
= branch_targets
.find(head
);
337 if ((iter
== branch_targets
.end()) != !*firstBlock
||
338 !*firstBlock
!= basic_blocks
->empty()) {
340 "Only the very first basic block should have no "
343 BasicBlock
* bb
= MakeBasicBlock(head
, tail
);
347 (*basic_blocks
)[head
] = bb
;
351 BasicBlock
* CodeGen::CutGraphIntoBasicBlocks(
352 Instruction
* instructions
,
353 const BranchTargets
& branch_targets
,
354 TargetsToBlocks
* basic_blocks
) {
355 // Textbook implementation of a basic block generator. All basic blocks
356 // start with a branch target and end with either a return statement or
357 // a jump (or are followed by an instruction that forms the beginning of a
358 // new block). Both conditional and "always" jumps are supported.
359 BasicBlock
* first_block
= NULL
;
360 std::set
<const Instruction
*> seen_instructions
;
362 Instruction
* tail
= NULL
;
363 Instruction
* head
= instructions
;
364 for (Instruction
* insn
= head
; insn
;) {
365 if (seen_instructions
.find(insn
) != seen_instructions
.end()) {
366 // We somehow went in a circle. This should never be possible. Not even
367 // cyclic graphs are supposed to confuse us this much.
368 SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
370 seen_instructions
.insert(insn
);
371 if (tail
&& branch_targets
.find(insn
) != branch_targets
.end()) {
372 // We reached a branch target. Start a new basic block (this means,
373 // flushing the previous basic block first).
374 AddBasicBlock(head
, tail
, branch_targets
, basic_blocks
, &first_block
);
377 if (BPF_CLASS(insn
->code
) == BPF_JMP
) {
378 // We reached a jump instruction, this completes our current basic
379 // block. Flush it and continue by traversing both the true and the
380 // false branch of the jump. We need to maintain a stack to do so.
381 AddBasicBlock(head
, insn
, branch_targets
, basic_blocks
, &first_block
);
382 if (BPF_OP(insn
->code
) != BPF_JA
) {
383 stack
.push_back(insn
->jf_ptr
);
387 // If we are jumping to an instruction that we have previously
388 // processed, we are done with this branch. Continue by backtracking
390 while (seen_instructions
.find(insn
) != seen_instructions
.end()) {
393 // We successfully traversed all reachable instructions.
396 // Going up the stack.
401 // Starting a new basic block.
405 // We found a non-jumping instruction, append it to current basic
410 // We reached a return statement, flush the current basic block and
411 // backtrack up the stack.
412 AddBasicBlock(head
, tail
, branch_targets
, basic_blocks
, &first_block
);
420 // We define a comparator that inspects the sequence of instructions in our
421 // basic block and any blocks referenced by this block. This function can be
422 // used in a "less" comparator for the purpose of storing pointers to basic
423 // blocks in STL containers; this gives an easy option to use STL to find
424 // shared tail sequences of basic blocks.
425 static int PointerCompare(const BasicBlock
* block1
,
426 const BasicBlock
* block2
,
427 const TargetsToBlocks
& blocks
) {
428 // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
429 // If we are looking at the exact same block, this is trivial and we don't
430 // need to do a full comparison.
431 if (block1
== block2
) {
435 // We compare the sequence of instructions in both basic blocks.
436 const Instructions
& insns1
= block1
->instructions
;
437 const Instructions
& insns2
= block2
->instructions
;
438 // Basic blocks should never be empty.
439 CHECK(!insns1
.empty());
440 CHECK(!insns2
.empty());
442 Instructions::const_iterator iter1
= insns1
.begin();
443 Instructions::const_iterator iter2
= insns2
.begin();
444 for (;; ++iter1
, ++iter2
) {
445 // If we have reached the end of the sequence of instructions in one or
446 // both basic blocks, we know the relative ordering between the two blocks
448 if (iter1
== insns1
.end() || iter2
== insns2
.end()) {
449 if (iter1
!= insns1
.end()) {
452 if (iter2
!= insns2
.end()) {
456 // If the two blocks are the same length (and have elementwise-equal code
457 // and k fields) and their last instructions are neither a JMP nor a RET
458 // (which is the only way we can reach this point), then we must compare
460 Instruction
* const insns1_last
= insns1
.back();
461 Instruction
* const insns2_last
= insns2
.back();
462 CHECK(BPF_CLASS(insns1_last
->code
) != BPF_JMP
&&
463 BPF_CLASS(insns1_last
->code
) != BPF_RET
);
465 // Non jumping instructions will always have a valid next instruction.
466 CHECK(insns1_last
->next
);
467 CHECK(insns2_last
->next
);
468 return PointerCompare(blocks
.find(insns1_last
->next
)->second
,
469 blocks
.find(insns2_last
->next
)->second
,
473 // Compare the individual fields for both instructions.
474 const Instruction
& insn1
= **iter1
;
475 const Instruction
& insn2
= **iter2
;
476 if (insn1
.code
!= insn2
.code
) {
477 return insn1
.code
- insn2
.code
;
479 if (insn1
.k
!= insn2
.k
) {
480 return insn1
.k
- insn2
.k
;
483 // Sanity check: If we're looking at a JMP or RET instruction, by definition
484 // it should be the last instruction of the basic block.
485 if (BPF_CLASS(insn1
.code
) == BPF_JMP
|| BPF_CLASS(insn1
.code
) == BPF_RET
) {
486 CHECK_EQ(insns1
.back(), &insn1
);
487 CHECK_EQ(insns2
.back(), &insn2
);
490 // RET instructions terminate execution, and only JMP instructions use the
491 // jt_ptr and jf_ptr fields. Anything else can continue to the next
492 // instruction in the basic block.
493 if (BPF_CLASS(insn1
.code
) == BPF_RET
) {
495 } else if (BPF_CLASS(insn1
.code
) != BPF_JMP
) {
499 // Recursively compare the "true" and "false" branches.
500 // A well-formed BPF program can't have any cycles, so we know
501 // that our recursive algorithm will ultimately terminate.
502 // In the unlikely event that the programmer made a mistake and
503 // went out of the way to give us a cyclic program, we will crash
504 // with a stack overflow. We are OK with that.
505 if (BPF_OP(insn1
.code
) != BPF_JA
) {
506 int c
= PointerCompare(blocks
.find(insn1
.jf_ptr
)->second
,
507 blocks
.find(insn2
.jf_ptr
)->second
,
513 return PointerCompare(blocks
.find(insn1
.jt_ptr
)->second
,
514 blocks
.find(insn2
.jt_ptr
)->second
,
519 void CodeGen::MergeTails(TargetsToBlocks
* blocks
) {
520 // We enter all of our basic blocks into a set using the BasicBlock::Less()
521 // comparator. This naturally results in blocks with identical tails of
522 // instructions to map to the same entry in the set. Whenever we discover
523 // that a particular chain of instructions is already in the set, we merge
524 // the basic blocks and update the pointer in the "blocks" map.
525 // Returns the number of unique basic blocks.
526 // N.B. We don't merge instructions on a granularity that is finer than
527 // a basic block. In practice, this is sufficiently rare that we don't
529 // Similarly, we currently don't merge anything other than tails. In
530 // the future, we might decide to revisit this decision and attempt to
531 // merge arbitrary sub-sequences of instructions.
532 BasicBlock::Less
<TargetsToBlocks
> less(*blocks
, PointerCompare
);
533 typedef std::set
<BasicBlock
*, BasicBlock::Less
<TargetsToBlocks
> > Set
;
534 Set
seen_basic_blocks(less
);
535 for (TargetsToBlocks::iterator iter
= blocks
->begin(); iter
!= blocks
->end();
537 BasicBlock
* bb
= iter
->second
;
538 Set::const_iterator entry
= seen_basic_blocks
.find(bb
);
539 if (entry
== seen_basic_blocks
.end()) {
540 // This is the first time we see this particular sequence of
541 // instructions. Enter the basic block into the set of known
543 seen_basic_blocks
.insert(bb
);
545 // We have previously seen another basic block that defines the same
546 // sequence of instructions. Merge the two blocks and update the
547 // pointer in the "blocks" map.
548 iter
->second
= *entry
;
553 void CodeGen::ComputeIncomingBranches(BasicBlock
* block
,
554 const TargetsToBlocks
& targets_to_blocks
,
555 IncomingBranches
* incoming_branches
) {
556 // We increment the number of incoming branches each time we encounter a
557 // basic block. But we only traverse recursively the very first time we
558 // encounter a new block. This is necessary to make topological sorting
560 if (++(*incoming_branches
)[block
] == 1) {
561 Instruction
* last_insn
= block
->instructions
.back();
562 if (BPF_CLASS(last_insn
->code
) == BPF_JMP
) {
563 ComputeIncomingBranches(targets_to_blocks
.find(last_insn
->jt_ptr
)->second
,
566 if (BPF_OP(last_insn
->code
) != BPF_JA
) {
567 ComputeIncomingBranches(
568 targets_to_blocks
.find(last_insn
->jf_ptr
)->second
,
572 } else if (BPF_CLASS(last_insn
->code
) != BPF_RET
) {
573 ComputeIncomingBranches(targets_to_blocks
.find(last_insn
->next
)->second
,
580 void CodeGen::TopoSortBasicBlocks(BasicBlock
* first_block
,
581 const TargetsToBlocks
& blocks
,
582 BasicBlocks
* basic_blocks
) {
583 // Textbook implementation of a toposort. We keep looking for basic blocks
584 // that don't have any incoming branches (initially, this is just the
585 // "first_block") and add them to the topologically sorted list of
586 // "basic_blocks". As we do so, we remove outgoing branches. This potentially
587 // ends up making our descendants eligible for the sorted list. The
588 // sorting algorithm terminates when there are no more basic blocks that have
589 // no incoming branches. If we didn't move all blocks from the set of
590 // "unordered_blocks" to the sorted list of "basic_blocks", there must have
591 // been a cyclic dependency. This should never happen in a BPF program, as
592 // well-formed BPF programs only ever have forward branches.
593 IncomingBranches unordered_blocks
;
594 ComputeIncomingBranches(first_block
, blocks
, &unordered_blocks
);
596 std::set
<BasicBlock
*> heads
;
598 // Move block from "unordered_blocks" to "basic_blocks".
599 basic_blocks
->push_back(first_block
);
601 // Inspect last instruction in the basic block. This is typically either a
602 // jump or a return statement. But it could also be a "normal" instruction
603 // that is followed by a jump target.
604 Instruction
* last_insn
= first_block
->instructions
.back();
605 if (BPF_CLASS(last_insn
->code
) == BPF_JMP
) {
606 // Remove outgoing branches. This might end up moving our descendants
607 // into set of "head" nodes that no longer have any incoming branches.
608 TargetsToBlocks::const_iterator iter
;
609 if (BPF_OP(last_insn
->code
) != BPF_JA
) {
610 iter
= blocks
.find(last_insn
->jf_ptr
);
611 if (!--unordered_blocks
[iter
->second
]) {
612 heads
.insert(iter
->second
);
615 iter
= blocks
.find(last_insn
->jt_ptr
);
616 if (!--unordered_blocks
[iter
->second
]) {
617 first_block
= iter
->second
;
620 } else if (BPF_CLASS(last_insn
->code
) != BPF_RET
) {
621 // We encountered an instruction that doesn't change code flow. Try to
622 // pick the next "first_block" from "last_insn->next", if possible.
623 TargetsToBlocks::const_iterator iter
;
624 iter
= blocks
.find(last_insn
->next
);
625 if (!--unordered_blocks
[iter
->second
]) {
626 first_block
= iter
->second
;
629 // Our basic block is supposed to be followed by "last_insn->next",
630 // but dependencies prevent this from happening. Insert a BPF_JA
631 // instruction to correct the code flow.
632 Instruction
* ja
= MakeInstruction(BPF_JMP
+ BPF_JA
, 0, last_insn
->next
);
633 first_block
->instructions
.push_back(ja
);
634 last_insn
->next
= ja
;
638 if (unordered_blocks
.size() != basic_blocks
->size()) {
639 SANDBOX_DIE("Internal compiler error; cyclic graph detected");
643 // Proceed by picking an arbitrary node from the set of basic blocks that
644 // do not have any incoming branches.
645 first_block
= *heads
.begin();
646 heads
.erase(heads
.begin());
650 void CodeGen::ComputeRelativeJumps(BasicBlocks
* basic_blocks
,
651 const TargetsToBlocks
& targets_to_blocks
) {
652 // While we previously used pointers in jt_ptr and jf_ptr to link jump
653 // instructions to their targets, we now convert these jumps to relative
654 // jumps that are suitable for loading the BPF program into the kernel.
657 // Since we just completed a toposort, all jump targets are guaranteed to
658 // go forward. This means, iterating over the basic blocks in reverse makes
659 // it trivial to compute the correct offsets.
660 BasicBlock
* bb
= NULL
;
661 BasicBlock
* last_bb
= NULL
;
662 for (BasicBlocks::reverse_iterator iter
= basic_blocks
->rbegin();
663 iter
!= basic_blocks
->rend();
667 Instruction
* insn
= bb
->instructions
.back();
668 if (BPF_CLASS(insn
->code
) == BPF_JMP
) {
669 // Basic block ended in a jump instruction. We can now compute the
670 // appropriate offsets.
671 if (BPF_OP(insn
->code
) == BPF_JA
) {
672 // "Always" jumps use the 32bit "k" field for the offset, instead
673 // of the 8bit "jt" and "jf" fields.
674 int jmp
= offset
- targets_to_blocks
.find(insn
->jt_ptr
)->second
->offset
;
676 insn
->jt
= insn
->jf
= 0;
678 // The offset computations for conditional jumps are just the same
679 // as for "always" jumps.
680 int jt
= offset
- targets_to_blocks
.find(insn
->jt_ptr
)->second
->offset
;
681 int jf
= offset
- targets_to_blocks
.find(insn
->jf_ptr
)->second
->offset
;
683 // There is an added complication, because conditional relative jumps
684 // can only jump at most 255 instructions forward. If we have to jump
685 // further, insert an extra "always" jump.
686 Instructions::size_type jmp
= bb
->instructions
.size();
687 if (jt
> 255 || (jt
== 255 && jf
> 255)) {
688 Instruction
* ja
= MakeInstruction(BPF_JMP
+ BPF_JA
, 0, insn
->jt_ptr
);
689 bb
->instructions
.push_back(ja
);
693 // The newly inserted "always" jump, of course, requires us to adjust
694 // the jump targets in the original conditional jump.
699 Instruction
* ja
= MakeInstruction(BPF_JMP
+ BPF_JA
, 0, insn
->jf_ptr
);
700 bb
->instructions
.insert(bb
->instructions
.begin() + jmp
, ja
);
704 // Again, we have to adjust the jump targets in the original
710 // Now we can finally set the relative jump targets in the conditional
711 // jump instruction. Afterwards, we must no longer access the jt_ptr
712 // and jf_ptr fields.
716 } else if (BPF_CLASS(insn
->code
) != BPF_RET
&&
717 targets_to_blocks
.find(insn
->next
)->second
!= last_bb
) {
718 SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
721 // Proceed to next basic block.
722 offset
+= bb
->instructions
.size();
728 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks
& basic_blocks
,
729 SandboxBPF::Program
* program
) {
730 // Our basic blocks have been sorted and relative jump offsets have been
731 // computed. The last remaining step is for all the instructions in our
732 // basic blocks to be concatenated into a BPF program.
734 for (BasicBlocks::const_iterator bb_iter
= basic_blocks
.begin();
735 bb_iter
!= basic_blocks
.end();
737 const BasicBlock
& bb
= **bb_iter
;
738 for (Instructions::const_iterator insn_iter
= bb
.instructions
.begin();
739 insn_iter
!= bb
.instructions
.end();
741 const Instruction
& insn
= **insn_iter
;
743 (struct sock_filter
) {insn
.code
, insn
.jt
, insn
.jf
, insn
.k
});
749 void CodeGen::Compile(Instruction
* instructions
, SandboxBPF::Program
* program
) {
752 "Cannot call Compile() multiple times. Create a new code "
753 "generator instead");
757 BranchTargets branch_targets
;
758 FindBranchTargets(*instructions
, &branch_targets
);
759 TargetsToBlocks all_blocks
;
760 BasicBlock
* first_block
=
761 CutGraphIntoBasicBlocks(instructions
, branch_targets
, &all_blocks
);
762 MergeTails(&all_blocks
);
763 BasicBlocks basic_blocks
;
764 TopoSortBasicBlocks(first_block
, all_blocks
, &basic_blocks
);
765 ComputeRelativeJumps(&basic_blocks
, all_blocks
);
766 ConcatenateBasicBlocks(basic_blocks
, program
);
770 } // namespace sandbox