sandbox/linux/bpf_dsl/codegen_unittest.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "sandbox/linux/bpf_dsl/codegen.h"
   6
   7 #include <map>
   8 #include <utility>
   9 #include <vector>
  10
  11 #include "base/macros.h"
  12 #include "base/md5.h"
  13 #include "base/strings/string_piece.h"
  14 #include "sandbox/linux/system_headers/linux_filter.h"
  15 #include "testing/gtest/include/gtest/gtest.h"
  16
  17 namespace sandbox {
  18 namespace {
  19
  20 // Hash provides an abstraction for building "hash trees" from BPF
  21 // control flow graphs, and efficiently identifying equivalent graphs.
  22 //
  23 // For simplicity, we use MD5, because base happens to provide a
  24 // convenient API for its use. However, any collision-resistant hash
  25 // should suffice.
  26 class Hash {
  27  public:
  28   static const Hash kZero;
  29
  30   Hash() : digest_() {}
  31
  32   Hash(uint16_t code,
  33        uint32_t k,
  34        const Hash& jt = kZero,
  35        const Hash& jf = kZero)
  36       : digest_() {
  37     base::MD5Context ctx;
  38     base::MD5Init(&ctx);
  39     HashValue(&ctx, code);
  40     HashValue(&ctx, k);
  41     HashValue(&ctx, jt);
  42     HashValue(&ctx, jf);
  43     base::MD5Final(&digest_, &ctx);
  44   }
  45
  46   Hash(const Hash& hash) = default;
  47   Hash& operator=(const Hash& rhs) = default;
  48
  49   friend bool operator==(const Hash& lhs, const Hash& rhs) {
  50     return lhs.Base16() == rhs.Base16();
  51   }
  52   friend bool operator!=(const Hash& lhs, const Hash& rhs) {
  53     return !(lhs == rhs);
  54   }
  55
  56  private:
  57   template <typename T>
  58   void HashValue(base::MD5Context* ctx, const T& value) {
  59     base::MD5Update(ctx,
  60                     base::StringPiece(reinterpret_cast<const char*>(&value),
  61                                       sizeof(value)));
  62   }
  63
  64   std::string Base16() const {
  65     return base::MD5DigestToBase16(digest_);
  66   }
  67
  68   base::MD5Digest digest_;
  69 };
  70
  71 const Hash Hash::kZero;
  72
  73 // Sanity check that equality and inequality work on Hash as required.
  74 TEST(CodeGen, HashSanity) {
  75   std::vector<Hash> hashes;
  76
  77   // Push a bunch of logically distinct hashes.
  78   hashes.push_back(Hash::kZero);
  79   for (int i = 0; i < 4; ++i) {
  80     hashes.push_back(Hash(i & 1, i & 2));
  81   }
  82   for (int i = 0; i < 16; ++i) {
  83     hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8)));
  84   }
  85   for (int i = 0; i < 64; ++i) {
  86     hashes.push_back(
  87         Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32)));
  88   }
  89
  90   for (const Hash& a : hashes) {
  91     for (const Hash& b : hashes) {
  92       // Hashes should equal themselves, but not equal all others.
  93       if (&a == &b) {
  94         EXPECT_EQ(a, b);
  95       } else {
  96         EXPECT_NE(a, b);
  97       }
  98     }
  99   }
 100 }
 101
 102 // ProgramTest provides a fixture for writing compiling sample
 103 // programs with CodeGen and verifying the linearized output matches
 104 // the input DAG.
 105 class ProgramTest : public ::testing::Test {
 106  protected:
 107   ProgramTest() : gen_(), node_hashes_() {}
 108
 109   // MakeInstruction calls CodeGen::MakeInstruction() and associated
 110   // the returned address with a hash of the instruction.
 111   CodeGen::Node MakeInstruction(uint16_t code,
 112                                 uint32_t k,
 113                                 CodeGen::Node jt = CodeGen::kNullNode,
 114                                 CodeGen::Node jf = CodeGen::kNullNode) {
 115     CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf);
 116     EXPECT_NE(CodeGen::kNullNode, res);
 117
 118     Hash digest(code, k, Lookup(jt), Lookup(jf));
 119     auto it = node_hashes_.insert(std::make_pair(res, digest));
 120     EXPECT_EQ(digest, it.first->second);
 121
 122     return res;
 123   }
 124
 125   // RunTest compiles the program and verifies that the output matches
 126   // what is expected.  It should be called at the end of each program
 127   // test case.
 128   void RunTest(CodeGen::Node head) {
 129     // Compile the program
 130     CodeGen::Program program;
 131     gen_.Compile(head, &program);
 132
 133     // Walk the program backwards, and compute the hash for each instruction.
 134     std::vector<Hash> prog_hashes(program.size());
 135     for (size_t i = program.size(); i > 0; --i) {
 136       const sock_filter& insn = program.at(i - 1);
 137       Hash& hash = prog_hashes.at(i - 1);
 138
 139       if (BPF_CLASS(insn.code) == BPF_JMP) {
 140         if (BPF_OP(insn.code) == BPF_JA) {
 141           // The compiler adds JA instructions as needed, so skip them.
 142           hash = prog_hashes.at(i + insn.k);
 143         } else {
 144           hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt),
 145                       prog_hashes.at(i + insn.jf));
 146         }
 147       } else if (BPF_CLASS(insn.code) == BPF_RET) {
 148         hash = Hash(insn.code, insn.k);
 149       } else {
 150         hash = Hash(insn.code, insn.k, prog_hashes.at(i));
 151       }
 152     }
 153
 154     EXPECT_EQ(Lookup(head), prog_hashes.at(0));
 155   }
 156
 157  private:
 158   const Hash& Lookup(CodeGen::Node next) const {
 159     if (next == CodeGen::kNullNode) {
 160       return Hash::kZero;
 161     }
 162     auto it = node_hashes_.find(next);
 163     if (it == node_hashes_.end()) {
 164       ADD_FAILURE() << "No hash found for node " << next;
 165       return Hash::kZero;
 166     }
 167     return it->second;
 168   }
 169
 170   CodeGen gen_;
 171   std::map<CodeGen::Node, Hash> node_hashes_;
 172
 173   DISALLOW_COPY_AND_ASSIGN(ProgramTest);
 174 };
 175
 176 TEST_F(ProgramTest, OneInstruction) {
 177   // Create the most basic valid BPF program:
 178   //    RET 0
 179   CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0);
 180   RunTest(head);
 181 }
 182
 183 TEST_F(ProgramTest, SimpleBranch) {
 184   // Create a program with a single branch:
 185   //    JUMP if eq 42 then $0 else $1
 186   // 0: RET 1
 187   // 1: RET 0
 188   CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42,
 189                                        MakeInstruction(BPF_RET + BPF_K, 1),
 190                                        MakeInstruction(BPF_RET + BPF_K, 0));
 191   RunTest(head);
 192 }
 193
 194 TEST_F(ProgramTest, AtypicalBranch) {
 195   // Create a program with a single branch:
 196   //    JUMP if eq 42 then $0 else $0
 197   // 0: RET 0
 198
 199   CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0);
 200   CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret);
 201
 202   // N.B.: As the instructions in both sides of the branch are already
 203   //       the same object, we do not actually have any "mergeable" branches.
 204   //       This needs to be reflected in our choice of "flags".
 205   RunTest(head);
 206 }
 207
 208 TEST_F(ProgramTest, Complex) {
 209   // Creates a basic BPF program that we'll use to test some of the code:
 210   //    JUMP if eq 42 the $0 else $1     (insn6)
 211   // 0: LD 23                            (insn5)
 212   // 1: JUMP if eq 42 then $2 else $4    (insn4)
 213   // 2: JUMP to $3                       (insn2)
 214   // 3: LD 42                            (insn1)
 215   //    RET 42                           (insn0)
 216   // 4: LD 42                            (insn3)
 217   //    RET 42                           (insn3+)
 218   CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42);
 219   CodeGen::Node insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0);
 220   CodeGen::Node insn2 = insn1;  // Implicit JUMP
 221
 222   // We explicitly duplicate instructions to test that they're merged.
 223   CodeGen::Node insn3 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42,
 224                                         MakeInstruction(BPF_RET + BPF_K, 42));
 225   EXPECT_EQ(insn2, insn3);
 226
 227   CodeGen::Node insn4 =
 228       MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3);
 229   CodeGen::Node insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4);
 230
 231   // Force a basic block that ends in neither a jump instruction nor a return
 232   // instruction. It only contains "insn5". This exercises one of the less
 233   // common code paths in the topo-sort algorithm.
 234   // This also gives us a diamond-shaped pattern in our graph, which stresses
 235   // another aspect of the topo-sort algorithm (namely, the ability to
 236   // correctly count the incoming branches for subtrees that are not disjunct).
 237   CodeGen::Node insn6 =
 238       MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4);
 239
 240   RunTest(insn6);
 241 }
 242
 243 TEST_F(ProgramTest, ConfusingTails) {
 244   // This simple program demonstrates https://crbug.com/351103/
 245   // The two "LOAD 0" instructions are blocks of their own. MergeTails() could
 246   // be tempted to merge them since they are the same. However, they are
 247   // not mergeable because they fall-through to non semantically equivalent
 248   // blocks.
 249   // Without the fix for this bug, this program should trigger the check in
 250   // CompileAndCompare: the serialized graphs from the program and its compiled
 251   // version will differ.
 252   //
 253   //  0) LOAD 1  // ???
 254   //  1) if A == 0x1; then JMP 2 else JMP 3
 255   //  2) LOAD 0  // System call number
 256   //  3) if A == 0x2; then JMP 4 else JMP 5
 257   //  4) LOAD 0  // System call number
 258   //  5) if A == 0x1; then JMP 6 else JMP 7
 259   //  6) RET 0
 260   //  7) RET 1
 261
 262   CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);
 263   CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);
 264   CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
 265   CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
 266   CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
 267   CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
 268   CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
 269   CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
 270
 271   RunTest(i0);
 272 }
 273
 274 TEST_F(ProgramTest, ConfusingTailsBasic) {
 275   // Without the fix for https://crbug.com/351103/, (see
 276   // SampleProgramConfusingTails()), this would generate a cyclic graph and
 277   // crash as the two "LOAD 0" instructions would get merged.
 278   //
 279   // 0) LOAD 1  // ???
 280   // 1) if A == 0x1; then JMP 2 else JMP 3
 281   // 2) LOAD 0  // System call number
 282   // 3) if A == 0x2; then JMP 4 else JMP 5
 283   // 4) LOAD 0  // System call number
 284   // 5) RET 1
 285
 286   CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1);
 287   CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
 288   CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
 289   CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
 290   CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
 291   CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
 292
 293   RunTest(i0);
 294 }
 295
 296 TEST_F(ProgramTest, ConfusingTailsMergeable) {
 297   // This is similar to SampleProgramConfusingTails(), except that
 298   // instructions 2 and 4 are now RET instructions.
 299   // In PointerCompare(), this exercises the path where two blocks are of the
 300   // same length and identical and the last instruction is a JMP or RET, so the
 301   // following blocks don't need to be looked at and the blocks are mergeable.
 302   //
 303   // 0) LOAD 1  // ???
 304   // 1) if A == 0x1; then JMP 2 else JMP 3
 305   // 2) RET 42
 306   // 3) if A == 0x2; then JMP 4 else JMP 5
 307   // 4) RET 42
 308   // 5) if A == 0x1; then JMP 6 else JMP 7
 309   // 6) RET 0
 310   // 7) RET 1
 311
 312   CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);
 313   CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);
 314   CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
 315   CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42);
 316   CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
 317   CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42);
 318   CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
 319   CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
 320
 321   RunTest(i0);
 322 }
 323
 324 TEST_F(ProgramTest, InstructionFolding) {
 325   // Check that simple instructions are folded as expected.
 326   CodeGen::Node a = MakeInstruction(BPF_RET + BPF_K, 0);
 327   EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));
 328   CodeGen::Node b = MakeInstruction(BPF_RET + BPF_K, 1);
 329   EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));
 330   EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));
 331   EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));
 332
 333   // Check that complex sequences are folded too.
 334   CodeGen::Node c =
 335       MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0,
 336                       MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b));
 337   EXPECT_EQ(c, MakeInstruction(
 338                    BPF_LD + BPF_W + BPF_ABS, 0,
 339                    MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b)));
 340
 341   RunTest(c);
 342 }
 343
 344 TEST_F(ProgramTest, FarBranches) {
 345   // BPF instructions use 8-bit fields for branch offsets, which means
 346   // branch targets must be within 255 instructions of the branch
 347   // instruction. CodeGen abstracts away this detail by inserting jump
 348   // instructions as needed, which we test here by generating programs
 349   // that should trigger any interesting boundary conditions.
 350
 351   // Populate with 260 initial instruction nodes.
 352   std::vector<CodeGen::Node> nodes;
 353   nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));
 354   for (size_t i = 1; i < 260; ++i) {
 355     nodes.push_back(
 356         MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));
 357   }
 358
 359   // Exhaustively test branch offsets near BPF's limits.
 360   for (size_t jt = 250; jt < 260; ++jt) {
 361     for (size_t jf = 250; jf < 260; ++jf) {
 362       nodes.push_back(MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0,
 363                                       nodes.rbegin()[jt], nodes.rbegin()[jf]));
 364       RunTest(nodes.back());
 365     }
 366   }
 367 }
 368
 369 TEST_F(ProgramTest, JumpReuse) {
 370   // As a code size optimization, we try to reuse jumps when possible
 371   // instead of emitting new ones. Here we make sure that optimization
 372   // is working as intended.
 373   //
 374   // NOTE: To simplify testing, we rely on implementation details
 375   // about what CodeGen::Node values indicate (i.e., vector indices),
 376   // but CodeGen users should treat them as opaque values.
 377
 378   // Populate with 260 initial instruction nodes.
 379   std::vector<CodeGen::Node> nodes;
 380   nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));
 381   for (size_t i = 1; i < 260; ++i) {
 382     nodes.push_back(
 383         MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));
 384   }
 385
 386   // Branching to nodes[0] and nodes[1] should require 3 new
 387   // instructions: two far jumps plus the branch itself.
 388   CodeGen::Node one =
 389       MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, nodes[0], nodes[1]);
 390   EXPECT_EQ(nodes.back() + 3, one);  // XXX: Implementation detail!
 391   RunTest(one);
 392
 393   // Branching again to the same target nodes should require only one
 394   // new instruction, as we can reuse the previous branch's jumps.
 395   CodeGen::Node two =
 396       MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, nodes[0], nodes[1]);
 397   EXPECT_EQ(one + 1, two);  // XXX: Implementation detail!
 398   RunTest(two);
 399 }
 400
 401 }  // namespace
 402 }  // namespace sandbox