sandbox/linux/bpf_dsl/codegen.h

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef SANDBOX_LINUX_BPF_DSL_CODEGEN_H__
   6 #define SANDBOX_LINUX_BPF_DSL_CODEGEN_H__
   7
   8 #include <stddef.h>
   9 #include <stdint.h>
  10
  11 #include <map>
  12 #include <vector>
  13
  14 #include "base/macros.h"
  15 #include "base/tuple.h"
  16 #include "sandbox/sandbox_export.h"
  17
  18 struct sock_filter;
  19
  20 namespace sandbox {
  21
  22 // The code generator implements a basic assembler that can convert a
  23 // graph of BPF instructions into a well-formed array of BPF
  24 // instructions. Most notably, it ensures that jumps are always
  25 // forward and don't exceed the limit of 255 instructions imposed by
  26 // the instruction set.
  27 //
  28 // Callers would typically create a new CodeGen object and then use it
  29 // to build a DAG of instruction nodes. They'll eventually call
  30 // Compile() to convert this DAG to a Program.
  31 //
  32 //   CodeGen gen;
  33 //   CodeGen::Node allow, branch, dag;
  34 //
  35 //   allow =
  36 //     gen.MakeInstruction(BPF_RET+BPF_K,
  37 //                         ErrorCode(ErrorCode::ERR_ALLOWED).err()));
  38 //   branch =
  39 //     gen.MakeInstruction(BPF_JMP+BPF_EQ+BPF_K, __NR_getpid,
  40 //                         Trap(GetPidHandler, NULL), allow);
  41 //   dag =
  42 //     gen.MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
  43 //                         offsetof(struct arch_seccomp_data, nr), branch);
  44 //
  45 //   // Simplified code follows; in practice, it is important to avoid calling
  46 //   // any C++ destructors after starting the sandbox.
  47 //   CodeGen::Program program;
  48 //   gen.Compile(dag, program);
  49 //   const struct sock_fprog prog = {
  50 //     static_cast<unsigned short>(program->size()), &program[0] };
  51 //   prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
  52 //
  53 class SANDBOX_EXPORT CodeGen {
  54  public:
  55   // A vector of BPF instructions that need to be installed as a filter
  56   // program in the kernel.
  57   typedef std::vector<struct sock_filter> Program;
  58
  59   // Node represents a node within the instruction DAG being compiled.
  60   using Node = Program::size_type;
  61
  62   // kNullNode represents the "null" node; i.e., the reserved node
  63   // value guaranteed to not equal any actual nodes.
  64   static const Node kNullNode = -1;
  65
  66   CodeGen();
  67   ~CodeGen();
  68
  69   // MakeInstruction creates a node representing the specified
  70   // instruction, or returns and existing equivalent node if one
  71   // exists. For details on the possible parameters refer to
  72   // https://www.kernel.org/doc/Documentation/networking/filter.txt.
  73   // TODO(mdempsky): Reconsider using default arguments here.
  74   Node MakeInstruction(uint16_t code,
  75                        uint32_t k,
  76                        Node jt = kNullNode,
  77                        Node jf = kNullNode);
  78
  79   // Compile linearizes the instruction DAG rooted at |head| into a
  80   // program that can be executed by a BPF virtual machine.
  81   void Compile(Node head, Program* program);
  82
  83  private:
  84   using MemoKey = Tuple<uint16_t, uint32_t, Node, Node>;
  85   struct MemoKeyLess {
  86     bool operator()(const MemoKey& lhs, const MemoKey& rhs) const;
  87   };
  88
  89   // AppendInstruction adds a new instruction, ensuring that |jt| and
  90   // |jf| are within range as necessary for |code|.
  91   Node AppendInstruction(uint16_t code, uint32_t k, Node jt, Node jf);
  92
  93   // WithinRange returns a node equivalent to |next| that is at most
  94   // |range| instructions away from the (logical) beginning of the
  95   // program.
  96   Node WithinRange(Node next, size_t range);
  97
  98   // Append appends a new instruction to the physical end (i.e.,
  99   // logical beginning) of |program_|.
 100   Node Append(uint16_t code, uint32_t k, size_t jt, size_t jf);
 101
 102   // Offset returns how many instructions exist in |program_| after |target|.
 103   size_t Offset(Node target) const;
 104
 105   // NOTE: program_ is the compiled program in *reverse*, so that
 106   // indices remain stable as we add instructions.
 107   Program program_;
 108
 109   // equivalent_ stores the most recent semantically-equivalent node for each
 110   // instruction in program_. A node is defined as semantically-equivalent to N
 111   // if it has the same instruction code and constant as N and its successor
 112   // nodes (if any) are semantically-equivalent to N's successor nodes, or
 113   // if it's an unconditional jump to a node semantically-equivalent to N.
 114   std::vector<Node> equivalent_;
 115
 116   std::map<MemoKey, Node, MemoKeyLess> memos_;
 117
 118   DISALLOW_COPY_AND_ASSIGN(CodeGen);
 119 };
 120
 121 }  // namespace sandbox
 122
 123 #endif  // SANDBOX_LINUX_BPF_DSL_CODEGEN_H__