Roll DEPS for libelf clang compilation fix.
[chromium-blink-merge.git] / sandbox / linux / seccomp-bpf / codegen.cc
blobc90bffcad30fd02f24bb74fc80e609982a5255d0
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <stdio.h>
7 #include "base/logging.h"
8 #include "sandbox/linux/seccomp-bpf/codegen.h"
10 namespace {
12 // Helper function for Traverse().
13 void TraverseRecursively(std::set<sandbox::Instruction*>* visited,
14 sandbox::Instruction* instruction) {
15 if (visited->find(instruction) == visited->end()) {
16 visited->insert(instruction);
17 switch (BPF_CLASS(instruction->code)) {
18 case BPF_JMP:
19 if (BPF_OP(instruction->code) != BPF_JA) {
20 TraverseRecursively(visited, instruction->jf_ptr);
22 TraverseRecursively(visited, instruction->jt_ptr);
23 break;
24 case BPF_RET:
25 break;
26 default:
27 TraverseRecursively(visited, instruction->next);
28 break;
33 } // namespace
35 namespace sandbox {
37 CodeGen::CodeGen() : compiled_(false) {}
39 CodeGen::~CodeGen() {
40 for (Instructions::iterator iter = instructions_.begin();
41 iter != instructions_.end();
42 ++iter) {
43 delete *iter;
45 for (BasicBlocks::iterator iter = basic_blocks_.begin();
46 iter != basic_blocks_.end();
47 ++iter) {
48 delete *iter;
52 void CodeGen::PrintProgram(const SandboxBPF::Program& program) {
53 for (SandboxBPF::Program::const_iterator iter = program.begin();
54 iter != program.end();
55 ++iter) {
56 int ip = (int)(iter - program.begin());
57 fprintf(stderr, "%3d) ", ip);
58 switch (BPF_CLASS(iter->code)) {
59 case BPF_LD:
60 if (iter->code == BPF_LD + BPF_W + BPF_ABS) {
61 fprintf(stderr, "LOAD %d // ", (int)iter->k);
62 if (iter->k == offsetof(struct arch_seccomp_data, nr)) {
63 fprintf(stderr, "System call number\n");
64 } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) {
65 fprintf(stderr, "Architecture\n");
66 } else if (iter->k ==
67 offsetof(struct arch_seccomp_data, instruction_pointer)) {
68 fprintf(stderr, "Instruction pointer (LSB)\n");
69 } else if (iter->k ==
70 offsetof(struct arch_seccomp_data, instruction_pointer) +
71 4) {
72 fprintf(stderr, "Instruction pointer (MSB)\n");
73 } else if (iter->k >= offsetof(struct arch_seccomp_data, args) &&
74 iter->k < offsetof(struct arch_seccomp_data, args) + 48 &&
75 (iter->k - offsetof(struct arch_seccomp_data, args)) % 4 ==
76 0) {
77 fprintf(
78 stderr,
79 "Argument %d (%cSB)\n",
80 (int)(iter->k - offsetof(struct arch_seccomp_data, args)) / 8,
81 (iter->k - offsetof(struct arch_seccomp_data, args)) % 8 ? 'M'
82 : 'L');
83 } else {
84 fprintf(stderr, "???\n");
86 } else {
87 fprintf(stderr, "LOAD ???\n");
89 break;
90 case BPF_JMP:
91 if (BPF_OP(iter->code) == BPF_JA) {
92 fprintf(stderr, "JMP %d\n", ip + iter->k + 1);
93 } else {
94 fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n",
95 BPF_OP(iter->code) == BPF_JSET ? "&" :
96 BPF_OP(iter->code) == BPF_JEQ ? "==" :
97 BPF_OP(iter->code) == BPF_JGE ? ">=" :
98 BPF_OP(iter->code) == BPF_JGT ? ">" : "???",
99 (int)iter->k,
100 ip + iter->jt + 1, ip + iter->jf + 1);
102 break;
103 case BPF_RET:
104 fprintf(stderr, "RET 0x%x // ", iter->k);
105 if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) {
106 fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA);
107 } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
108 fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA);
109 } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRACE) {
110 fprintf(stderr, "Trace #%d\n", iter->k & SECCOMP_RET_DATA);
111 } else if (iter->k == SECCOMP_RET_ALLOW) {
112 fprintf(stderr, "Allowed\n");
113 } else {
114 fprintf(stderr, "???\n");
116 break;
117 case BPF_ALU:
118 fprintf(stderr, BPF_OP(iter->code) == BPF_NEG
119 ? "A := -A\n" : "A := A %s 0x%x\n",
120 BPF_OP(iter->code) == BPF_ADD ? "+" :
121 BPF_OP(iter->code) == BPF_SUB ? "-" :
122 BPF_OP(iter->code) == BPF_MUL ? "*" :
123 BPF_OP(iter->code) == BPF_DIV ? "/" :
124 BPF_OP(iter->code) == BPF_MOD ? "%" :
125 BPF_OP(iter->code) == BPF_OR ? "|" :
126 BPF_OP(iter->code) == BPF_XOR ? "^" :
127 BPF_OP(iter->code) == BPF_AND ? "&" :
128 BPF_OP(iter->code) == BPF_LSH ? "<<" :
129 BPF_OP(iter->code) == BPF_RSH ? ">>" : "???",
130 (int)iter->k);
131 break;
132 default:
133 fprintf(stderr, "???\n");
134 break;
137 return;
140 Instruction* CodeGen::MakeInstruction(uint16_t code,
141 uint32_t k,
142 Instruction* next) {
143 // We can handle non-jumping instructions and "always" jumps. Both of
144 // them are followed by exactly one "next" instruction.
145 // We allow callers to defer specifying "next", but then they must call
146 // "joinInstructions" later.
147 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
148 SANDBOX_DIE(
149 "Must provide both \"true\" and \"false\" branch "
150 "for a BPF_JMP");
152 if (next && BPF_CLASS(code) == BPF_RET) {
153 SANDBOX_DIE("Cannot append instructions after a return statement");
155 if (BPF_CLASS(code) == BPF_JMP) {
156 // "Always" jumps use the "true" branch target, only.
157 Instruction* insn = new Instruction(code, 0, next, NULL);
158 instructions_.push_back(insn);
159 return insn;
160 } else {
161 // Non-jumping instructions do not use any of the branch targets.
162 Instruction* insn = new Instruction(code, k, next);
163 instructions_.push_back(insn);
164 return insn;
168 Instruction* CodeGen::MakeInstruction(uint16_t code, const ErrorCode& err) {
169 if (BPF_CLASS(code) != BPF_RET) {
170 SANDBOX_DIE("ErrorCodes can only be used in return expressions");
172 if (err.error_type_ != ErrorCode::ET_SIMPLE &&
173 err.error_type_ != ErrorCode::ET_TRAP) {
174 SANDBOX_DIE("ErrorCode is not suitable for returning from a BPF program");
176 return MakeInstruction(code, err.err_);
179 Instruction* CodeGen::MakeInstruction(uint16_t code,
180 uint32_t k,
181 Instruction* jt,
182 Instruction* jf) {
183 // We can handle all conditional jumps. They are followed by both a
184 // "true" and a "false" branch.
185 if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) {
186 SANDBOX_DIE("Expected a BPF_JMP instruction");
188 if (!jt && !jf) {
189 // We allow callers to defer specifying exactly one of the branch
190 // targets. It must then be set later by calling "JoinInstructions".
191 SANDBOX_DIE("Branches must jump to a valid instruction");
193 Instruction* insn = new Instruction(code, k, jt, jf);
194 instructions_.push_back(insn);
195 return insn;
198 void CodeGen::JoinInstructions(Instruction* head, Instruction* tail) {
199 // Merge two instructions, or set the branch target for an "always" jump.
200 // This function should be called, if the caller didn't initially provide
201 // a value for "next" when creating the instruction.
202 if (BPF_CLASS(head->code) == BPF_JMP) {
203 if (BPF_OP(head->code) == BPF_JA) {
204 if (head->jt_ptr) {
205 SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
207 head->jt_ptr = tail;
208 } else {
209 if (!head->jt_ptr && head->jf_ptr) {
210 head->jt_ptr = tail;
211 } else if (!head->jf_ptr && head->jt_ptr) {
212 head->jf_ptr = tail;
213 } else {
214 SANDBOX_DIE("Cannot append instructions after a jump");
217 } else if (BPF_CLASS(head->code) == BPF_RET) {
218 SANDBOX_DIE("Cannot append instructions after a return statement");
219 } else if (head->next) {
220 SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
221 } else {
222 head->next = tail;
224 return;
227 void CodeGen::Traverse(Instruction* instruction,
228 void (*fnc)(Instruction*, void*),
229 void* aux) {
230 std::set<Instruction*> visited;
231 TraverseRecursively(&visited, instruction);
232 for (std::set<Instruction*>::const_iterator iter = visited.begin();
233 iter != visited.end();
234 ++iter) {
235 fnc(*iter, aux);
239 void CodeGen::FindBranchTargets(const Instruction& instructions,
240 BranchTargets* branch_targets) {
241 // Follow all possible paths through the "instructions" graph and compute
242 // a list of branch targets. This will later be needed to compute the
243 // boundaries of basic blocks.
244 // We maintain a set of all instructions that we have previously seen. This
245 // set ultimately converges on all instructions in the program.
246 std::set<const Instruction*> seen_instructions;
247 Instructions stack;
248 for (const Instruction* insn = &instructions; insn;) {
249 seen_instructions.insert(insn);
250 if (BPF_CLASS(insn->code) == BPF_JMP) {
251 // Found a jump. Increase count of incoming edges for each of the jump
252 // targets.
253 ++(*branch_targets)[insn->jt_ptr];
254 if (BPF_OP(insn->code) != BPF_JA) {
255 ++(*branch_targets)[insn->jf_ptr];
256 stack.push_back(const_cast<Instruction*>(insn));
258 // Start a recursive decent for depth-first traversal.
259 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
260 // We haven't seen the "true" branch yet. Traverse it now. We have
261 // already remembered the "false" branch on the stack and will
262 // traverse it later.
263 insn = insn->jt_ptr;
264 continue;
265 } else {
266 // Now try traversing the "false" branch.
267 insn = NULL;
269 } else {
270 // This is a non-jump instruction, just continue to the next instruction
271 // (if any). It's OK if "insn" becomes NULL when reaching a return
272 // instruction.
273 if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
274 SANDBOX_DIE(
275 "Internal compiler error; return instruction must be at "
276 "the end of the BPF program");
278 if (seen_instructions.find(insn->next) == seen_instructions.end()) {
279 insn = insn->next;
280 } else {
281 // We have seen this instruction before. That could happen if it is
282 // a branch target. No need to continue processing.
283 insn = NULL;
286 while (!insn && !stack.empty()) {
287 // We are done processing all the way to a leaf node, backtrack up the
288 // stack to any branches that we haven't processed yet. By definition,
289 // this has to be a "false" branch, as we always process the "true"
290 // branches right away.
291 insn = stack.back();
292 stack.pop_back();
293 if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
294 // We haven't seen the "false" branch yet. So, that's where we'll
295 // go now.
296 insn = insn->jf_ptr;
297 } else {
298 // We have seen both the "true" and the "false" branch, continue
299 // up the stack.
300 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
301 SANDBOX_DIE(
302 "Internal compiler error; cannot find all "
303 "branch targets");
305 insn = NULL;
309 return;
312 BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) {
313 // Iterate over all the instructions between "head" and "tail" and
314 // insert them into a new basic block.
315 BasicBlock* bb = new BasicBlock;
316 for (;; head = head->next) {
317 bb->instructions.push_back(head);
318 if (head == tail) {
319 break;
321 if (BPF_CLASS(head->code) == BPF_JMP) {
322 SANDBOX_DIE("Found a jump inside of a basic block");
325 basic_blocks_.push_back(bb);
326 return bb;
329 void CodeGen::AddBasicBlock(Instruction* head,
330 Instruction* tail,
331 const BranchTargets& branch_targets,
332 TargetsToBlocks* basic_blocks,
333 BasicBlock** firstBlock) {
334 // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
335 // has not been set before.
336 BranchTargets::const_iterator iter = branch_targets.find(head);
337 if ((iter == branch_targets.end()) != !*firstBlock ||
338 !*firstBlock != basic_blocks->empty()) {
339 SANDBOX_DIE(
340 "Only the very first basic block should have no "
341 "incoming jumps");
343 BasicBlock* bb = MakeBasicBlock(head, tail);
344 if (!*firstBlock) {
345 *firstBlock = bb;
347 (*basic_blocks)[head] = bb;
348 return;
351 BasicBlock* CodeGen::CutGraphIntoBasicBlocks(
352 Instruction* instructions,
353 const BranchTargets& branch_targets,
354 TargetsToBlocks* basic_blocks) {
355 // Textbook implementation of a basic block generator. All basic blocks
356 // start with a branch target and end with either a return statement or
357 // a jump (or are followed by an instruction that forms the beginning of a
358 // new block). Both conditional and "always" jumps are supported.
359 BasicBlock* first_block = NULL;
360 std::set<const Instruction*> seen_instructions;
361 Instructions stack;
362 Instruction* tail = NULL;
363 Instruction* head = instructions;
364 for (Instruction* insn = head; insn;) {
365 if (seen_instructions.find(insn) != seen_instructions.end()) {
366 // We somehow went in a circle. This should never be possible. Not even
367 // cyclic graphs are supposed to confuse us this much.
368 SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
370 seen_instructions.insert(insn);
371 if (tail && branch_targets.find(insn) != branch_targets.end()) {
372 // We reached a branch target. Start a new basic block (this means,
373 // flushing the previous basic block first).
374 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
375 head = insn;
377 if (BPF_CLASS(insn->code) == BPF_JMP) {
378 // We reached a jump instruction, this completes our current basic
379 // block. Flush it and continue by traversing both the true and the
380 // false branch of the jump. We need to maintain a stack to do so.
381 AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
382 if (BPF_OP(insn->code) != BPF_JA) {
383 stack.push_back(insn->jf_ptr);
385 insn = insn->jt_ptr;
387 // If we are jumping to an instruction that we have previously
388 // processed, we are done with this branch. Continue by backtracking
389 // up the stack.
390 while (seen_instructions.find(insn) != seen_instructions.end()) {
391 backtracking:
392 if (stack.empty()) {
393 // We successfully traversed all reachable instructions.
394 return first_block;
395 } else {
396 // Going up the stack.
397 insn = stack.back();
398 stack.pop_back();
401 // Starting a new basic block.
402 tail = NULL;
403 head = insn;
404 } else {
405 // We found a non-jumping instruction, append it to current basic
406 // block.
407 tail = insn;
408 insn = insn->next;
409 if (!insn) {
410 // We reached a return statement, flush the current basic block and
411 // backtrack up the stack.
412 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
413 goto backtracking;
417 return first_block;
420 // We define a comparator that inspects the sequence of instructions in our
421 // basic block and any blocks referenced by this block. This function can be
422 // used in a "less" comparator for the purpose of storing pointers to basic
423 // blocks in STL containers; this gives an easy option to use STL to find
424 // shared tail sequences of basic blocks.
425 static int PointerCompare(const BasicBlock* block1,
426 const BasicBlock* block2,
427 const TargetsToBlocks& blocks) {
428 // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
429 // If we are looking at the exact same block, this is trivial and we don't
430 // need to do a full comparison.
431 if (block1 == block2) {
432 return 0;
435 // We compare the sequence of instructions in both basic blocks.
436 const Instructions& insns1 = block1->instructions;
437 const Instructions& insns2 = block2->instructions;
438 // Basic blocks should never be empty.
439 CHECK(!insns1.empty());
440 CHECK(!insns2.empty());
442 Instructions::const_iterator iter1 = insns1.begin();
443 Instructions::const_iterator iter2 = insns2.begin();
444 for (;; ++iter1, ++iter2) {
445 // If we have reached the end of the sequence of instructions in one or
446 // both basic blocks, we know the relative ordering between the two blocks
447 // and can return.
448 if (iter1 == insns1.end() || iter2 == insns2.end()) {
449 if (iter1 != insns1.end()) {
450 return 1;
452 if (iter2 != insns2.end()) {
453 return -1;
456 // If the two blocks are the same length (and have elementwise-equal code
457 // and k fields) and their last instructions are neither a JMP nor a RET
458 // (which is the only way we can reach this point), then we must compare
459 // their successors.
460 Instruction* const insns1_last = insns1.back();
461 Instruction* const insns2_last = insns2.back();
462 CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP &&
463 BPF_CLASS(insns1_last->code) != BPF_RET);
465 // Non jumping instructions will always have a valid next instruction.
466 CHECK(insns1_last->next);
467 CHECK(insns2_last->next);
468 return PointerCompare(blocks.find(insns1_last->next)->second,
469 blocks.find(insns2_last->next)->second,
470 blocks);
473 // Compare the individual fields for both instructions.
474 const Instruction& insn1 = **iter1;
475 const Instruction& insn2 = **iter2;
476 if (insn1.code != insn2.code) {
477 return insn1.code - insn2.code;
479 if (insn1.k != insn2.k) {
480 return insn1.k - insn2.k;
483 // Sanity check: If we're looking at a JMP or RET instruction, by definition
484 // it should be the last instruction of the basic block.
485 if (BPF_CLASS(insn1.code) == BPF_JMP || BPF_CLASS(insn1.code) == BPF_RET) {
486 CHECK_EQ(insns1.back(), &insn1);
487 CHECK_EQ(insns2.back(), &insn2);
490 // RET instructions terminate execution, and only JMP instructions use the
491 // jt_ptr and jf_ptr fields. Anything else can continue to the next
492 // instruction in the basic block.
493 if (BPF_CLASS(insn1.code) == BPF_RET) {
494 return 0;
495 } else if (BPF_CLASS(insn1.code) != BPF_JMP) {
496 continue;
499 // Recursively compare the "true" and "false" branches.
500 // A well-formed BPF program can't have any cycles, so we know
501 // that our recursive algorithm will ultimately terminate.
502 // In the unlikely event that the programmer made a mistake and
503 // went out of the way to give us a cyclic program, we will crash
504 // with a stack overflow. We are OK with that.
505 if (BPF_OP(insn1.code) != BPF_JA) {
506 int c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
507 blocks.find(insn2.jf_ptr)->second,
508 blocks);
509 if (c != 0) {
510 return c;
513 return PointerCompare(blocks.find(insn1.jt_ptr)->second,
514 blocks.find(insn2.jt_ptr)->second,
515 blocks);
519 void CodeGen::MergeTails(TargetsToBlocks* blocks) {
520 // We enter all of our basic blocks into a set using the BasicBlock::Less()
521 // comparator. This naturally results in blocks with identical tails of
522 // instructions to map to the same entry in the set. Whenever we discover
523 // that a particular chain of instructions is already in the set, we merge
524 // the basic blocks and update the pointer in the "blocks" map.
525 // Returns the number of unique basic blocks.
526 // N.B. We don't merge instructions on a granularity that is finer than
527 // a basic block. In practice, this is sufficiently rare that we don't
528 // incur a big cost.
529 // Similarly, we currently don't merge anything other than tails. In
530 // the future, we might decide to revisit this decision and attempt to
531 // merge arbitrary sub-sequences of instructions.
532 BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
533 typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set;
534 Set seen_basic_blocks(less);
535 for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end();
536 ++iter) {
537 BasicBlock* bb = iter->second;
538 Set::const_iterator entry = seen_basic_blocks.find(bb);
539 if (entry == seen_basic_blocks.end()) {
540 // This is the first time we see this particular sequence of
541 // instructions. Enter the basic block into the set of known
542 // basic blocks.
543 seen_basic_blocks.insert(bb);
544 } else {
545 // We have previously seen another basic block that defines the same
546 // sequence of instructions. Merge the two blocks and update the
547 // pointer in the "blocks" map.
548 iter->second = *entry;
553 void CodeGen::ComputeIncomingBranches(BasicBlock* block,
554 const TargetsToBlocks& targets_to_blocks,
555 IncomingBranches* incoming_branches) {
556 // We increment the number of incoming branches each time we encounter a
557 // basic block. But we only traverse recursively the very first time we
558 // encounter a new block. This is necessary to make topological sorting
559 // work correctly.
560 if (++(*incoming_branches)[block] == 1) {
561 Instruction* last_insn = block->instructions.back();
562 if (BPF_CLASS(last_insn->code) == BPF_JMP) {
563 ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second,
564 targets_to_blocks,
565 incoming_branches);
566 if (BPF_OP(last_insn->code) != BPF_JA) {
567 ComputeIncomingBranches(
568 targets_to_blocks.find(last_insn->jf_ptr)->second,
569 targets_to_blocks,
570 incoming_branches);
572 } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
573 ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
574 targets_to_blocks,
575 incoming_branches);
580 void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block,
581 const TargetsToBlocks& blocks,
582 BasicBlocks* basic_blocks) {
583 // Textbook implementation of a toposort. We keep looking for basic blocks
584 // that don't have any incoming branches (initially, this is just the
585 // "first_block") and add them to the topologically sorted list of
586 // "basic_blocks". As we do so, we remove outgoing branches. This potentially
587 // ends up making our descendants eligible for the sorted list. The
588 // sorting algorithm terminates when there are no more basic blocks that have
589 // no incoming branches. If we didn't move all blocks from the set of
590 // "unordered_blocks" to the sorted list of "basic_blocks", there must have
591 // been a cyclic dependency. This should never happen in a BPF program, as
592 // well-formed BPF programs only ever have forward branches.
593 IncomingBranches unordered_blocks;
594 ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
596 std::set<BasicBlock*> heads;
597 for (;;) {
598 // Move block from "unordered_blocks" to "basic_blocks".
599 basic_blocks->push_back(first_block);
601 // Inspect last instruction in the basic block. This is typically either a
602 // jump or a return statement. But it could also be a "normal" instruction
603 // that is followed by a jump target.
604 Instruction* last_insn = first_block->instructions.back();
605 if (BPF_CLASS(last_insn->code) == BPF_JMP) {
606 // Remove outgoing branches. This might end up moving our descendants
607 // into set of "head" nodes that no longer have any incoming branches.
608 TargetsToBlocks::const_iterator iter;
609 if (BPF_OP(last_insn->code) != BPF_JA) {
610 iter = blocks.find(last_insn->jf_ptr);
611 if (!--unordered_blocks[iter->second]) {
612 heads.insert(iter->second);
615 iter = blocks.find(last_insn->jt_ptr);
616 if (!--unordered_blocks[iter->second]) {
617 first_block = iter->second;
618 continue;
620 } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
621 // We encountered an instruction that doesn't change code flow. Try to
622 // pick the next "first_block" from "last_insn->next", if possible.
623 TargetsToBlocks::const_iterator iter;
624 iter = blocks.find(last_insn->next);
625 if (!--unordered_blocks[iter->second]) {
626 first_block = iter->second;
627 continue;
628 } else {
629 // Our basic block is supposed to be followed by "last_insn->next",
630 // but dependencies prevent this from happening. Insert a BPF_JA
631 // instruction to correct the code flow.
632 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next);
633 first_block->instructions.push_back(ja);
634 last_insn->next = ja;
637 if (heads.empty()) {
638 if (unordered_blocks.size() != basic_blocks->size()) {
639 SANDBOX_DIE("Internal compiler error; cyclic graph detected");
641 return;
643 // Proceed by picking an arbitrary node from the set of basic blocks that
644 // do not have any incoming branches.
645 first_block = *heads.begin();
646 heads.erase(heads.begin());
650 void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks,
651 const TargetsToBlocks& targets_to_blocks) {
652 // While we previously used pointers in jt_ptr and jf_ptr to link jump
653 // instructions to their targets, we now convert these jumps to relative
654 // jumps that are suitable for loading the BPF program into the kernel.
655 int offset = 0;
657 // Since we just completed a toposort, all jump targets are guaranteed to
658 // go forward. This means, iterating over the basic blocks in reverse makes
659 // it trivial to compute the correct offsets.
660 BasicBlock* bb = NULL;
661 BasicBlock* last_bb = NULL;
662 for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
663 iter != basic_blocks->rend();
664 ++iter) {
665 last_bb = bb;
666 bb = *iter;
667 Instruction* insn = bb->instructions.back();
668 if (BPF_CLASS(insn->code) == BPF_JMP) {
669 // Basic block ended in a jump instruction. We can now compute the
670 // appropriate offsets.
671 if (BPF_OP(insn->code) == BPF_JA) {
672 // "Always" jumps use the 32bit "k" field for the offset, instead
673 // of the 8bit "jt" and "jf" fields.
674 int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
675 insn->k = jmp;
676 insn->jt = insn->jf = 0;
677 } else {
678 // The offset computations for conditional jumps are just the same
679 // as for "always" jumps.
680 int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
681 int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset;
683 // There is an added complication, because conditional relative jumps
684 // can only jump at most 255 instructions forward. If we have to jump
685 // further, insert an extra "always" jump.
686 Instructions::size_type jmp = bb->instructions.size();
687 if (jt > 255 || (jt == 255 && jf > 255)) {
688 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr);
689 bb->instructions.push_back(ja);
690 ja->k = jt;
691 ja->jt = ja->jf = 0;
693 // The newly inserted "always" jump, of course, requires us to adjust
694 // the jump targets in the original conditional jump.
695 jt = 0;
696 ++jf;
698 if (jf > 255) {
699 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr);
700 bb->instructions.insert(bb->instructions.begin() + jmp, ja);
701 ja->k = jf;
702 ja->jt = ja->jf = 0;
704 // Again, we have to adjust the jump targets in the original
705 // conditional jump.
706 ++jt;
707 jf = 0;
710 // Now we can finally set the relative jump targets in the conditional
711 // jump instruction. Afterwards, we must no longer access the jt_ptr
712 // and jf_ptr fields.
713 insn->jt = jt;
714 insn->jf = jf;
716 } else if (BPF_CLASS(insn->code) != BPF_RET &&
717 targets_to_blocks.find(insn->next)->second != last_bb) {
718 SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
721 // Proceed to next basic block.
722 offset += bb->instructions.size();
723 bb->offset = offset;
725 return;
728 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
729 SandboxBPF::Program* program) {
730 // Our basic blocks have been sorted and relative jump offsets have been
731 // computed. The last remaining step is for all the instructions in our
732 // basic blocks to be concatenated into a BPF program.
733 program->clear();
734 for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
735 bb_iter != basic_blocks.end();
736 ++bb_iter) {
737 const BasicBlock& bb = **bb_iter;
738 for (Instructions::const_iterator insn_iter = bb.instructions.begin();
739 insn_iter != bb.instructions.end();
740 ++insn_iter) {
741 const Instruction& insn = **insn_iter;
742 program->push_back(
743 (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k});
746 return;
749 void CodeGen::Compile(Instruction* instructions, SandboxBPF::Program* program) {
750 if (compiled_) {
751 SANDBOX_DIE(
752 "Cannot call Compile() multiple times. Create a new code "
753 "generator instead");
755 compiled_ = true;
757 BranchTargets branch_targets;
758 FindBranchTargets(*instructions, &branch_targets);
759 TargetsToBlocks all_blocks;
760 BasicBlock* first_block =
761 CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
762 MergeTails(&all_blocks);
763 BasicBlocks basic_blocks;
764 TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
765 ComputeRelativeJumps(&basic_blocks, all_blocks);
766 ConcatenateBasicBlocks(basic_blocks, program);
767 return;
770 } // namespace sandbox