1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "sandbox/linux/bpf_dsl/codegen.h"
10 #include "base/logging.h"
11 #include "sandbox/linux/system_headers/linux_filter.h"
13 // This CodeGen implementation strives for simplicity while still
14 // generating acceptable BPF programs under typical usage patterns
15 // (e.g., by PolicyCompiler).
17 // The key to its simplicity is that BPF programs only support forward
18 // jumps/branches, which allows constraining the DAG construction API
19 // to make instruction nodes immutable. Immutable nodes admits a
20 // simple greedy approach of emitting new instructions as needed and
21 // then reusing existing ones that have already been emitted. This
22 // cleanly avoids any need to compute basic blocks or apply
23 // topological sorting because the API effectively sorts instructions
24 // for us (e.g., before MakeInstruction() can be called to emit a
25 // branch instruction, it must have already been called for each
28 // This greedy algorithm is not without (theoretical) weakness though:
30 // 1. In the general case, we don't eliminate dead code. If needed,
31 // we could trace back through the program in Compile() and elide
32 // any unneeded instructions, but in practice we only emit live
33 // instructions anyway.
35 // 2. By not dividing instructions into basic blocks and sorting, we
36 // lose an opportunity to move non-branch/non-return instructions
37 // adjacent to their successor instructions, which means we might
38 // need to emit additional jumps. But in practice, they'll
39 // already be nearby as long as callers don't go out of their way
40 // to interleave MakeInstruction() calls for unrelated code
45 // kBranchRange is the maximum value that can be stored in
46 // sock_filter's 8-bit jt and jf fields.
47 const size_t kBranchRange
= std::numeric_limits
<uint8_t>::max();
49 const CodeGen::Node
CodeGen::kNullNode
;
51 CodeGen::CodeGen() : program_(), equivalent_(), memos_() {
57 void CodeGen::Compile(CodeGen::Node head
, Program
* out
) {
59 out
->assign(program_
.rbegin() + Offset(head
), program_
.rend());
62 CodeGen::Node
CodeGen::MakeInstruction(uint16_t code
,
66 // To avoid generating redundant code sequences, we memoize the
67 // results from AppendInstruction().
68 auto res
= memos_
.insert(std::make_pair(MemoKey(code
, k
, jt
, jf
), kNullNode
));
69 CodeGen::Node
* node
= &res
.first
->second
;
70 if (res
.second
) { // Newly inserted memo entry.
71 *node
= AppendInstruction(code
, k
, jt
, jf
);
76 CodeGen::Node
CodeGen::AppendInstruction(uint16_t code
,
80 if (BPF_CLASS(code
) == BPF_JMP
) {
81 CHECK_NE(BPF_JA
, BPF_OP(code
)) << "CodeGen inserts JAs as needed";
83 // Optimally adding jumps is rather tricky, so we use a quick
84 // approximation: by artificially reducing |jt|'s range, |jt| will
85 // stay within its true range even if we add a jump for |jf|.
86 jt
= WithinRange(jt
, kBranchRange
- 1);
87 jf
= WithinRange(jf
, kBranchRange
);
88 return Append(code
, k
, Offset(jt
), Offset(jf
));
91 CHECK_EQ(kNullNode
, jf
) << "Non-branch instructions shouldn't provide jf";
92 if (BPF_CLASS(code
) == BPF_RET
) {
93 CHECK_EQ(kNullNode
, jt
) << "Return instructions shouldn't provide jt";
95 // For non-branch/non-return instructions, execution always
96 // proceeds to the next instruction; so we need to arrange for
98 jt
= WithinRange(jt
, 0);
99 CHECK_EQ(0U, Offset(jt
)) << "ICE: Failed to setup next instruction";
101 return Append(code
, k
, 0, 0);
104 CodeGen::Node
CodeGen::WithinRange(Node target
, size_t range
) {
105 // Just use |target| if it's already within range.
106 if (Offset(target
) <= range
) {
110 // Alternatively, look for an equivalent instruction within range.
111 if (Offset(equivalent_
.at(target
)) <= range
) {
112 return equivalent_
.at(target
);
115 // Otherwise, fall back to emitting a jump instruction.
116 Node jump
= Append(BPF_JMP
| BPF_JA
, Offset(target
), 0, 0);
117 equivalent_
.at(target
) = jump
;
121 CodeGen::Node
CodeGen::Append(uint16_t code
, uint32_t k
, size_t jt
, size_t jf
) {
122 if (BPF_CLASS(code
) == BPF_JMP
&& BPF_OP(code
) != BPF_JA
) {
123 CHECK_LE(jt
, kBranchRange
);
124 CHECK_LE(jf
, kBranchRange
);
130 CHECK_LT(program_
.size(), static_cast<size_t>(BPF_MAXINSNS
));
131 CHECK_EQ(program_
.size(), equivalent_
.size());
133 Node res
= program_
.size();
134 program_
.push_back(sock_filter
{
135 code
, static_cast<uint8_t>(jt
), static_cast<uint8_t>(jf
), k
});
136 equivalent_
.push_back(res
);
140 size_t CodeGen::Offset(Node target
) const {
141 CHECK_LT(target
, program_
.size()) << "Bogus offset target node";
142 return (program_
.size() - 1) - target
;
145 // TODO(mdempsky): Move into a general base::Tuple helper library.
146 bool CodeGen::MemoKeyLess::operator()(const MemoKey
& lhs
,
147 const MemoKey
& rhs
) const {
148 if (base::get
<0>(lhs
) != base::get
<0>(rhs
))
149 return base::get
<0>(lhs
) < base::get
<0>(rhs
);
150 if (base::get
<1>(lhs
) != base::get
<1>(rhs
))
151 return base::get
<1>(lhs
) < base::get
<1>(rhs
);
152 if (base::get
<2>(lhs
) != base::get
<2>(rhs
))
153 return base::get
<2>(lhs
) < base::get
<2>(rhs
);
154 if (base::get
<3>(lhs
) != base::get
<3>(rhs
))
155 return base::get
<3>(lhs
) < base::get
<3>(rhs
);
159 } // namespace sandbox