sandbox/linux/bpf_dsl/codegen.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "sandbox/linux/bpf_dsl/codegen.h"
   6
   7 #include <limits>
   8 #include <utility>
   9
  10 #include "base/logging.h"
  11 #include "sandbox/linux/system_headers/linux_filter.h"
  12
  13 // This CodeGen implementation strives for simplicity while still
  14 // generating acceptable BPF programs under typical usage patterns
  15 // (e.g., by PolicyCompiler).
  16 //
  17 // The key to its simplicity is that BPF programs only support forward
  18 // jumps/branches, which allows constraining the DAG construction API
  19 // to make instruction nodes immutable. Immutable nodes admits a
  20 // simple greedy approach of emitting new instructions as needed and
  21 // then reusing existing ones that have already been emitted. This
  22 // cleanly avoids any need to compute basic blocks or apply
  23 // topological sorting because the API effectively sorts instructions
  24 // for us (e.g., before MakeInstruction() can be called to emit a
  25 // branch instruction, it must have already been called for each
  26 // branch path).
  27 //
  28 // This greedy algorithm is not without (theoretical) weakness though:
  29 //
  30 //   1. In the general case, we don't eliminate dead code.  If needed,
  31 //      we could trace back through the program in Compile() and elide
  32 //      any unneeded instructions, but in practice we only emit live
  33 //      instructions anyway.
  34 //
  35 //   2. By not dividing instructions into basic blocks and sorting, we
  36 //      lose an opportunity to move non-branch/non-return instructions
  37 //      adjacent to their successor instructions, which means we might
  38 //      need to emit additional jumps. But in practice, they'll
  39 //      already be nearby as long as callers don't go out of their way
  40 //      to interleave MakeInstruction() calls for unrelated code
  41 //      sequences.
  42
  43 namespace sandbox {
  44
  45 // kBranchRange is the maximum value that can be stored in
  46 // sock_filter's 8-bit jt and jf fields.
  47 const size_t kBranchRange = std::numeric_limits<uint8_t>::max();
  48
  49 const CodeGen::Node CodeGen::kNullNode;
  50
  51 CodeGen::CodeGen() : program_(), equivalent_(), memos_() {
  52 }
  53
  54 CodeGen::~CodeGen() {
  55 }
  56
  57 void CodeGen::Compile(CodeGen::Node head, Program* out) {
  58   DCHECK(out);
  59   out->assign(program_.rbegin() + Offset(head), program_.rend());
  60 }
  61
  62 CodeGen::Node CodeGen::MakeInstruction(uint16_t code,
  63                                        uint32_t k,
  64                                        Node jt,
  65                                        Node jf) {
  66   // To avoid generating redundant code sequences, we memoize the
  67   // results from AppendInstruction().
  68   auto res = memos_.insert(std::make_pair(MemoKey(code, k, jt, jf), kNullNode));
  69   CodeGen::Node* node = &res.first->second;
  70   if (res.second) {  // Newly inserted memo entry.
  71     *node = AppendInstruction(code, k, jt, jf);
  72   }
  73   return *node;
  74 }
  75
  76 CodeGen::Node CodeGen::AppendInstruction(uint16_t code,
  77                                          uint32_t k,
  78                                          Node jt,
  79                                          Node jf) {
  80   if (BPF_CLASS(code) == BPF_JMP) {
  81     CHECK_NE(BPF_JA, BPF_OP(code)) << "CodeGen inserts JAs as needed";
  82
  83     // Optimally adding jumps is rather tricky, so we use a quick
  84     // approximation: by artificially reducing |jt|'s range, |jt| will
  85     // stay within its true range even if we add a jump for |jf|.
  86     jt = WithinRange(jt, kBranchRange - 1);
  87     jf = WithinRange(jf, kBranchRange);
  88     return Append(code, k, Offset(jt), Offset(jf));
  89   }
  90
  91   CHECK_EQ(kNullNode, jf) << "Non-branch instructions shouldn't provide jf";
  92   if (BPF_CLASS(code) == BPF_RET) {
  93     CHECK_EQ(kNullNode, jt) << "Return instructions shouldn't provide jt";
  94   } else {
  95     // For non-branch/non-return instructions, execution always
  96     // proceeds to the next instruction; so we need to arrange for
  97     // that to be |jt|.
  98     jt = WithinRange(jt, 0);
  99     CHECK_EQ(0U, Offset(jt)) << "ICE: Failed to setup next instruction";
 100   }
 101   return Append(code, k, 0, 0);
 102 }
 103
 104 CodeGen::Node CodeGen::WithinRange(Node target, size_t range) {
 105   // Just use |target| if it's already within range.
 106   if (Offset(target) <= range) {
 107     return target;
 108   }
 109
 110   // Alternatively, look for an equivalent instruction within range.
 111   if (Offset(equivalent_.at(target)) <= range) {
 112     return equivalent_.at(target);
 113   }
 114
 115   // Otherwise, fall back to emitting a jump instruction.
 116   Node jump = Append(BPF_JMP | BPF_JA, Offset(target), 0, 0);
 117   equivalent_.at(target) = jump;
 118   return jump;
 119 }
 120
 121 CodeGen::Node CodeGen::Append(uint16_t code, uint32_t k, size_t jt, size_t jf) {
 122   if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
 123     CHECK_LE(jt, kBranchRange);
 124     CHECK_LE(jf, kBranchRange);
 125   } else {
 126     CHECK_EQ(0U, jt);
 127     CHECK_EQ(0U, jf);
 128   }
 129
 130   CHECK_LT(program_.size(), static_cast<size_t>(BPF_MAXINSNS));
 131   CHECK_EQ(program_.size(), equivalent_.size());
 132
 133   Node res = program_.size();
 134   program_.push_back(sock_filter{
 135       code, static_cast<uint8_t>(jt), static_cast<uint8_t>(jf), k});
 136   equivalent_.push_back(res);
 137   return res;
 138 }
 139
 140 size_t CodeGen::Offset(Node target) const {
 141   CHECK_LT(target, program_.size()) << "Bogus offset target node";
 142   return (program_.size() - 1) - target;
 143 }
 144
 145 // TODO(mdempsky): Move into a general base::Tuple helper library.
 146 bool CodeGen::MemoKeyLess::operator()(const MemoKey& lhs,
 147                                       const MemoKey& rhs) const {
 148   if (base::get<0>(lhs) != base::get<0>(rhs))
 149     return base::get<0>(lhs) < base::get<0>(rhs);
 150   if (base::get<1>(lhs) != base::get<1>(rhs))
 151     return base::get<1>(lhs) < base::get<1>(rhs);
 152   if (base::get<2>(lhs) != base::get<2>(rhs))
 153     return base::get<2>(lhs) < base::get<2>(rhs);
 154   if (base::get<3>(lhs) != base::get<3>(rhs))
 155     return base::get<3>(lhs) < base::get<3>(rhs);
 156   return false;
 157 }
 158
 159 }  // namespace sandbox