external/bsd/libpcap/dist/optimize.c

   1 /*      $NetBSD: optimize.c,v 1.8 2015/03/31 21:39:42 christos Exp $    */
   2
   3 /*
   4  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   5  *      The Regents of the University of California.  All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that: (1) source code distributions
   9  * retain the above copyright notice and this paragraph in its entirety, (2)
  10  * distributions including binary code include the above copyright notice and
  11  * this paragraph in its entirety in the documentation or other materials
  12  * provided with the distribution, and (3) all advertising materials mentioning
  13  * features or use of this software display the following acknowledgement:
  14  * ``This product includes software developed by the University of California,
  15  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  16  * the University nor the names of its contributors may be used to endorse
  17  * or promote products derived from this software without specific prior
  18  * written permission.
  19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  20  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  21  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  22  *
  23  *  Optimization module for tcpdump intermediate representation.
  24  */
  25
  26 #include <sys/cdefs.h>
  27 __RCSID("$NetBSD: optimize.c,v 1.8 2015/03/31 21:39:42 christos Exp $");
  28
  29 #ifdef HAVE_CONFIG_H
  30 #include "config.h"
  31 #endif
  32
  33 #ifdef WIN32
  34 #include <pcap-stdinc.h>
  35 #else /* WIN32 */
  36 #if HAVE_INTTYPES_H
  37 #include <inttypes.h>
  38 #elif HAVE_STDINT_H
  39 #include <stdint.h>
  40 #endif
  41 #ifdef HAVE_SYS_BITYPES_H
  42 #include <sys/bitypes.h>
  43 #endif
  44 #include <sys/types.h>
  45 #endif /* WIN32 */
  46
  47 #include <stdio.h>
  48 #include <stdlib.h>
  49 #include <memory.h>
  50 #include <string.h>
  51
  52 #include <errno.h>
  53
  54 #include "pcap-int.h"
  55
  56 #include "gencode.h"
  57
  58 #ifdef HAVE_OS_PROTO_H
  59 #include "os-proto.h"
  60 #endif
  61
  62 #ifdef BDEBUG
  63 extern int dflag;
  64 #endif
  65
  66 #if defined(MSDOS) && !defined(__DJGPP__)
  67 extern int _w32_ffs (int mask);
  68 #define ffs _w32_ffs
  69 #endif
  70
  71 #if defined(WIN32) && defined (_MSC_VER)
  72 int ffs(int mask);
  73 #endif
  74
  75 /*
  76  * Represents a deleted instruction.
  77  */
  78 #define NOP -1
  79
  80 /*
  81  * Register numbers for use-def values.
  82  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
  83  * location.  A_ATOM is the accumulator and X_ATOM is the index
  84  * register.
  85  */
  86 #define A_ATOM BPF_MEMWORDS
  87 #define X_ATOM (BPF_MEMWORDS+1)
  88
  89 /*
  90  * This define is used to represent *both* the accumulator and
  91  * x register in use-def computations.
  92  * Currently, the use-def code assumes only one definition per instruction.
  93  */
  94 #define AX_ATOM N_ATOMS
  95
  96 /*
  97  * A flag to indicate that further optimization is needed.
  98  * Iterative passes are continued until a given pass yields no
  99  * branch movement.
 100  */
 101 static int done;
 102
 103 /*
 104  * A block is marked if only if its mark equals the current mark.
 105  * Rather than traverse the code array, marking each item, 'cur_mark' is
 106  * incremented.  This automatically makes each element unmarked.
 107  */
 108 static int cur_mark;
 109 #define isMarked(p) ((p)->mark == cur_mark)
 110 #define unMarkAll() cur_mark += 1
 111 #define Mark(p) ((p)->mark = cur_mark)
 112
 113 static void opt_init(struct block *);
 114 static void opt_cleanup(void);
 115
 116 static void intern_blocks(struct block *);
 117
 118 static void find_inedges(struct block *);
 119 #ifdef BDEBUG
 120 static void opt_dump(struct block *);
 121 #endif
 122
 123 static int n_blocks;
 124 struct block **blocks;
 125 static int n_edges;
 126 struct edge **edges;
 127
 128 /*
 129  * A bit vector set representation of the dominators.
 130  * We round up the set size to the next power of two.
 131  */
 132 static int nodewords;
 133 static int edgewords;
 134 struct block **levels;
 135 bpf_u_int32 *space;
 136 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 137 /*
 138  * True if a is in uset {p}
 139  */
 140 #define SET_MEMBER(p, a) \
 141 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 142
 143 /*
 144  * Add 'a' to uset p.
 145  */
 146 #define SET_INSERT(p, a) \
 147 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 148
 149 /*
 150  * Delete 'a' from uset p.
 151  */
 152 #define SET_DELETE(p, a) \
 153 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 154
 155 /*
 156  * a := a intersect b
 157  */
 158 #define SET_INTERSECT(a, b, n)\
 159 {\
 160         register bpf_u_int32 *_x = a, *_y = b;\
 161         register int _n = n;\
 162         while (--_n >= 0) *_x++ &= *_y++;\
 163 }
 164
 165 /*
 166  * a := a - b
 167  */
 168 #define SET_SUBTRACT(a, b, n)\
 169 {\
 170         register bpf_u_int32 *_x = a, *_y = b;\
 171         register int _n = n;\
 172         while (--_n >= 0) *_x++ &=~ *_y++;\
 173 }
 174
 175 /*
 176  * a := a union b
 177  */
 178 #define SET_UNION(a, b, n)\
 179 {\
 180         register bpf_u_int32 *_x = a, *_y = b;\
 181         register int _n = n;\
 182         while (--_n >= 0) *_x++ |= *_y++;\
 183 }
 184
 185 static uset all_dom_sets;
 186 static uset all_closure_sets;
 187 static uset all_edge_sets;
 188
 189 #ifndef MAX
 190 #define MAX(a,b) ((a)>(b)?(a):(b))
 191 #endif
 192
 193 static void
 194 find_levels_r(struct block *b)
 195 {
 196         int level;
 197
 198         if (isMarked(b))
 199                 return;
 200
 201         Mark(b);
 202         b->link = 0;
 203
 204         if (JT(b)) {
 205                 find_levels_r(JT(b));
 206                 find_levels_r(JF(b));
 207                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 208         } else
 209                 level = 0;
 210         b->level = level;
 211         b->link = levels[level];
 212         levels[level] = b;
 213 }
 214
 215 /*
 216  * Level graph.  The levels go from 0 at the leaves to
 217  * N_LEVELS at the root.  The levels[] array points to the
 218  * first node of the level list, whose elements are linked
 219  * with the 'link' field of the struct block.
 220  */
 221 static void
 222 find_levels(struct block *root)
 223 {
 224         memset((char *)levels, 0, n_blocks * sizeof(*levels));
 225         unMarkAll();
 226         find_levels_r(root);
 227 }
 228
 229 /*
 230  * Find dominator relationships.
 231  * Assumes graph has been leveled.
 232  */
 233 static void
 234 find_dom(struct block *root)
 235 {
 236         int i;
 237         struct block *b;
 238         bpf_u_int32 *x;
 239
 240         /*
 241          * Initialize sets to contain all nodes.
 242          */
 243         x = all_dom_sets;
 244         i = n_blocks * nodewords;
 245         while (--i >= 0)
 246                 *x++ = ~0;
 247         /* Root starts off empty. */
 248         for (i = nodewords; --i >= 0;)
 249                 root->dom[i] = 0;
 250
 251         /* root->level is the highest level no found. */
 252         for (i = root->level; i >= 0; --i) {
 253                 for (b = levels[i]; b; b = b->link) {
 254                         SET_INSERT(b->dom, b->id);
 255                         if (JT(b) == 0)
 256                                 continue;
 257                         SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
 258                         SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
 259                 }
 260         }
 261 }
 262
 263 static void
 264 propedom(struct edge *ep)
 265 {
 266         SET_INSERT(ep->edom, ep->id);
 267         if (ep->succ) {
 268                 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
 269                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
 270         }
 271 }
 272
 273 /*
 274  * Compute edge dominators.
 275  * Assumes graph has been leveled and predecessors established.
 276  */
 277 static void
 278 find_edom(struct block *root)
 279 {
 280         int i;
 281         uset x;
 282         struct block *b;
 283
 284         x = all_edge_sets;
 285         for (i = n_edges * edgewords; --i >= 0; )
 286                 x[i] = ~0;
 287
 288         /* root->level is the highest level no found. */
 289         memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
 290         memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
 291         for (i = root->level; i >= 0; --i) {
 292                 for (b = levels[i]; b != 0; b = b->link) {
 293                         propedom(&b->et);
 294                         propedom(&b->ef);
 295                 }
 296         }
 297 }
 298
 299 /*
 300  * Find the backwards transitive closure of the flow graph.  These sets
 301  * are backwards in the sense that we find the set of nodes that reach
 302  * a given node, not the set of nodes that can be reached by a node.
 303  *
 304  * Assumes graph has been leveled.
 305  */
 306 static void
 307 find_closure(struct block *root)
 308 {
 309         int i;
 310         struct block *b;
 311
 312         /*
 313          * Initialize sets to contain no nodes.
 314          */
 315         memset((char *)all_closure_sets, 0,
 316               n_blocks * nodewords * sizeof(*all_closure_sets));
 317
 318         /* root->level is the highest level no found. */
 319         for (i = root->level; i >= 0; --i) {
 320                 for (b = levels[i]; b; b = b->link) {
 321                         SET_INSERT(b->closure, b->id);
 322                         if (JT(b) == 0)
 323                                 continue;
 324                         SET_UNION(JT(b)->closure, b->closure, nodewords);
 325                         SET_UNION(JF(b)->closure, b->closure, nodewords);
 326                 }
 327         }
 328 }
 329
 330 /*
 331  * Return the register number that is used by s.  If A and X are both
 332  * used, return AX_ATOM.  If no register is used, return -1.
 333  *
 334  * The implementation should probably change to an array access.
 335  */
 336 static int
 337 atomuse(struct stmt *s)
 338 {
 339         register int c = s->code;
 340
 341         if (c == NOP)
 342                 return -1;
 343
 344         switch (BPF_CLASS(c)) {
 345
 346         case BPF_RET:
 347                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 348                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 349
 350         case BPF_LD:
 351         case BPF_LDX:
 352                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 353                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 354
 355         case BPF_ST:
 356                 return A_ATOM;
 357
 358         case BPF_STX:
 359                 return X_ATOM;
 360
 361         case BPF_JMP:
 362         case BPF_ALU:
 363                 if (BPF_SRC(c) == BPF_X)
 364                         return AX_ATOM;
 365                 return A_ATOM;
 366
 367         case BPF_MISC:
 368                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 369         }
 370         abort();
 371         /* NOTREACHED */
 372 }
 373
 374 /*
 375  * Return the register number that is defined by 's'.  We assume that
 376  * a single stmt cannot define more than one register.  If no register
 377  * is defined, return -1.
 378  *
 379  * The implementation should probably change to an array access.
 380  */
 381 static int
 382 atomdef(struct stmt *s)
 383 {
 384         if (s->code == NOP)
 385                 return -1;
 386
 387         switch (BPF_CLASS(s->code)) {
 388
 389         case BPF_LD:
 390         case BPF_ALU:
 391                 return A_ATOM;
 392
 393         case BPF_LDX:
 394                 return X_ATOM;
 395
 396         case BPF_ST:
 397         case BPF_STX:
 398                 return s->k;
 399
 400         case BPF_MISC:
 401                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 402         }
 403         return -1;
 404 }
 405
 406 /*
 407  * Compute the sets of registers used, defined, and killed by 'b'.
 408  *
 409  * "Used" means that a statement in 'b' uses the register before any
 410  * statement in 'b' defines it, i.e. it uses the value left in
 411  * that register by a predecessor block of this block.
 412  * "Defined" means that a statement in 'b' defines it.
 413  * "Killed" means that a statement in 'b' defines it before any
 414  * statement in 'b' uses it, i.e. it kills the value left in that
 415  * register by a predecessor block of this block.
 416  */
 417 static void
 418 compute_local_ud(struct block *b)
 419 {
 420         struct slist *s;
 421         atomset def = 0, use = 0, kill = 0;
 422         int atom;
 423
 424         for (s = b->stmts; s; s = s->next) {
 425                 if (s->s.code == NOP)
 426                         continue;
 427                 atom = atomuse(&s->s);
 428                 if (atom >= 0) {
 429                         if (atom == AX_ATOM) {
 430                                 if (!ATOMELEM(def, X_ATOM))
 431                                         use |= ATOMMASK(X_ATOM);
 432                                 if (!ATOMELEM(def, A_ATOM))
 433                                         use |= ATOMMASK(A_ATOM);
 434                         }
 435                         else if (atom < N_ATOMS) {
 436                                 if (!ATOMELEM(def, atom))
 437                                         use |= ATOMMASK(atom);
 438                         }
 439                         else
 440                                 abort();
 441                 }
 442                 atom = atomdef(&s->s);
 443                 if (atom >= 0) {
 444                         if (!ATOMELEM(use, atom))
 445                                 kill |= ATOMMASK(atom);
 446                         def |= ATOMMASK(atom);
 447                 }
 448         }
 449         if (BPF_CLASS(b->s.code) == BPF_JMP) {
 450                 /*
 451                  * XXX - what about RET?
 452                  */
 453                 atom = atomuse(&b->s);
 454                 if (atom >= 0) {
 455                         if (atom == AX_ATOM) {
 456                                 if (!ATOMELEM(def, X_ATOM))
 457                                         use |= ATOMMASK(X_ATOM);
 458                                 if (!ATOMELEM(def, A_ATOM))
 459                                         use |= ATOMMASK(A_ATOM);
 460                         }
 461                         else if (atom < N_ATOMS) {
 462                                 if (!ATOMELEM(def, atom))
 463                                         use |= ATOMMASK(atom);
 464                         }
 465                         else
 466                                 abort();
 467                 }
 468         }
 469
 470         b->def = def;
 471         b->kill = kill;
 472         b->in_use = use;
 473 }
 474
 475 /*
 476  * Assume graph is already leveled.
 477  */
 478 static void
 479 find_ud(struct block *root)
 480 {
 481         int i, maxlevel;
 482         struct block *p;
 483
 484         /*
 485          * root->level is the highest level no found;
 486          * count down from there.
 487          */
 488         maxlevel = root->level;
 489         for (i = maxlevel; i >= 0; --i)
 490                 for (p = levels[i]; p; p = p->link) {
 491                         compute_local_ud(p);
 492                         p->out_use = 0;
 493                 }
 494
 495         for (i = 1; i <= maxlevel; ++i) {
 496                 for (p = levels[i]; p; p = p->link) {
 497                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 498                         p->in_use |= p->out_use &~ p->kill;
 499                 }
 500         }
 501 }
 502
 503 /*
 504  * These data structures are used in a Cocke and Shwarz style
 505  * value numbering scheme.  Since the flowgraph is acyclic,
 506  * exit values can be propagated from a node's predecessors
 507  * provided it is uniquely defined.
 508  */
 509 struct valnode {
 510         int code;
 511         int v0, v1;
 512         int val;
 513         struct valnode *next;
 514 };
 515
 516 #define MODULUS 213
 517 static struct valnode *hashtbl[MODULUS];
 518 static int curval;
 519 static int maxval;
 520
 521 /* Integer constants mapped with the load immediate opcode. */
 522 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
 523
 524 struct vmapinfo {
 525         int is_const;
 526         bpf_int32 const_val;
 527 };
 528
 529 struct vmapinfo *vmap;
 530 struct valnode *vnode_base;
 531 struct valnode *next_vnode;
 532
 533 static void
 534 init_val(void)
 535 {
 536         curval = 0;
 537         next_vnode = vnode_base;
 538         memset((char *)vmap, 0, maxval * sizeof(*vmap));
 539         memset((char *)hashtbl, 0, sizeof hashtbl);
 540 }
 541
 542 /* Because we really don't have an IR, this stuff is a little messy. */
 543 static int
 544 F(int code, int v0, int v1)
 545 {
 546         u_int hash;
 547         int val;
 548         struct valnode *p;
 549
 550         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 551         hash %= MODULUS;
 552
 553         for (p = hashtbl[hash]; p; p = p->next)
 554                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 555                         return p->val;
 556
 557         val = ++curval;
 558         if (BPF_MODE(code) == BPF_IMM &&
 559             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 560                 vmap[val].const_val = v0;
 561                 vmap[val].is_const = 1;
 562         }
 563         p = next_vnode++;
 564         p->val = val;
 565         p->code = code;
 566         p->v0 = v0;
 567         p->v1 = v1;
 568         p->next = hashtbl[hash];
 569         hashtbl[hash] = p;
 570
 571         return val;
 572 }
 573
 574 static inline void
 575 vstore(struct stmt *s, int *valp, int newval, int alter)
 576 {
 577         if (alter && *valp == newval)
 578                 s->code = NOP;
 579         else
 580                 *valp = newval;
 581 }
 582
 583 /*
 584  * Do constant-folding on binary operators.
 585  * (Unary operators are handled elsewhere.)
 586  */
 587 static void
 588 fold_op(struct stmt *s, int v0, int v1)
 589 {
 590         bpf_u_int32 a, b;
 591
 592         a = vmap[v0].const_val;
 593         b = vmap[v1].const_val;
 594
 595         switch (BPF_OP(s->code)) {
 596         case BPF_ADD:
 597                 a += b;
 598                 break;
 599
 600         case BPF_SUB:
 601                 a -= b;
 602                 break;
 603
 604         case BPF_MUL:
 605                 a *= b;
 606                 break;
 607
 608         case BPF_DIV:
 609                 if (b == 0)
 610                         bpf_error("division by zero");
 611                 a /= b;
 612                 break;
 613
 614         case BPF_MOD:
 615                 if (b == 0)
 616                         bpf_error("modulus by zero");
 617                 a %= b;
 618                 break;
 619
 620         case BPF_AND:
 621                 a &= b;
 622                 break;
 623
 624         case BPF_OR:
 625                 a |= b;
 626                 break;
 627
 628         case BPF_XOR:
 629                 a ^= b;
 630                 break;
 631
 632         case BPF_LSH:
 633                 a <<= b;
 634                 break;
 635
 636         case BPF_RSH:
 637                 a >>= b;
 638                 break;
 639
 640         default:
 641                 abort();
 642         }
 643         s->k = a;
 644         s->code = BPF_LD|BPF_IMM;
 645         done = 0;
 646 }
 647
 648 static inline struct slist *
 649 this_op(struct slist *s)
 650 {
 651         while (s != 0 && s->s.code == NOP)
 652                 s = s->next;
 653         return s;
 654 }
 655
 656 static void
 657 opt_not(struct block *b)
 658 {
 659         struct block *tmp = JT(b);
 660
 661         JT(b) = JF(b);
 662         JF(b) = tmp;
 663 }
 664
 665 static void
 666 opt_peep(struct block *b)
 667 {
 668         struct slist *s;
 669         struct slist *next, *last;
 670         int val;
 671
 672         s = b->stmts;
 673         if (s == 0)
 674                 return;
 675
 676         last = s;
 677         for (/*empty*/; /*empty*/; s = next) {
 678                 /*
 679                  * Skip over nops.
 680                  */
 681                 s = this_op(s);
 682                 if (s == 0)
 683                         break;  /* nothing left in the block */
 684
 685                 /*
 686                  * Find the next real instruction after that one
 687                  * (skipping nops).
 688                  */
 689                 next = this_op(s->next);
 690                 if (next == 0)
 691                         break;  /* no next instruction */
 692                 last = next;
 693
 694                 /*
 695                  * st  M[k]     -->     st  M[k]
 696                  * ldx M[k]             tax
 697                  */
 698                 if (s->s.code == BPF_ST &&
 699                     next->s.code == (BPF_LDX|BPF_MEM) &&
 700                     s->s.k == next->s.k) {
 701                         done = 0;
 702                         next->s.code = BPF_MISC|BPF_TAX;
 703                 }
 704                 /*
 705                  * ld  #k       -->     ldx  #k
 706                  * tax                  txa
 707                  */
 708                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 709                     next->s.code == (BPF_MISC|BPF_TAX)) {
 710                         s->s.code = BPF_LDX|BPF_IMM;
 711                         next->s.code = BPF_MISC|BPF_TXA;
 712                         done = 0;
 713                 }
 714                 /*
 715                  * This is an ugly special case, but it happens
 716                  * when you say tcp[k] or udp[k] where k is a constant.
 717                  */
 718                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 719                         struct slist *add, *tax, *ild;
 720
 721                         /*
 722                          * Check that X isn't used on exit from this
 723                          * block (which the optimizer might cause).
 724                          * We know the code generator won't generate
 725                          * any local dependencies.
 726                          */
 727                         if (ATOMELEM(b->out_use, X_ATOM))
 728                                 continue;
 729
 730                         /*
 731                          * Check that the instruction following the ldi
 732                          * is an addx, or it's an ldxms with an addx
 733                          * following it (with 0 or more nops between the
 734                          * ldxms and addx).
 735                          */
 736                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 737                                 add = next;
 738                         else
 739                                 add = this_op(next->next);
 740                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 741                                 continue;
 742
 743                         /*
 744                          * Check that a tax follows that (with 0 or more
 745                          * nops between them).
 746                          */
 747                         tax = this_op(add->next);
 748                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 749                                 continue;
 750
 751                         /*
 752                          * Check that an ild follows that (with 0 or more
 753                          * nops between them).
 754                          */
 755                         ild = this_op(tax->next);
 756                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 757                             BPF_MODE(ild->s.code) != BPF_IND)
 758                                 continue;
 759                         /*
 760                          * We want to turn this sequence:
 761                          *
 762                          * (004) ldi     #0x2           {s}
 763                          * (005) ldxms   [14]           {next}  -- optional
 764                          * (006) addx                   {add}
 765                          * (007) tax                    {tax}
 766                          * (008) ild     [x+0]          {ild}
 767                          *
 768                          * into this sequence:
 769                          *
 770                          * (004) nop
 771                          * (005) ldxms   [14]
 772                          * (006) nop
 773                          * (007) nop
 774                          * (008) ild     [x+2]
 775                          *
 776                          * XXX We need to check that X is not
 777                          * subsequently used, because we want to change
 778                          * what'll be in it after this sequence.
 779                          *
 780                          * We know we can eliminate the accumulator
 781                          * modifications earlier in the sequence since
 782                          * it is defined by the last stmt of this sequence
 783                          * (i.e., the last statement of the sequence loads
 784                          * a value into the accumulator, so we can eliminate
 785                          * earlier operations on the accumulator).
 786                          */
 787                         ild->s.k += s->s.k;
 788                         s->s.code = NOP;
 789                         add->s.code = NOP;
 790                         tax->s.code = NOP;
 791                         done = 0;
 792                 }
 793         }
 794         /*
 795          * If the comparison at the end of a block is an equality
 796          * comparison against a constant, and nobody uses the value
 797          * we leave in the A register at the end of a block, and
 798          * the operation preceding the comparison is an arithmetic
 799          * operation, we can sometime optimize it away.
 800          */
 801         if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
 802             !ATOMELEM(b->out_use, A_ATOM)) {
 803                 /*
 804                  * We can optimize away certain subtractions of the
 805                  * X register.
 806                  */
 807                 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
 808                         val = b->val[X_ATOM];
 809                         if (vmap[val].is_const) {
 810                                 /*
 811                                  * If we have a subtract to do a comparison,
 812                                  * and the X register is a known constant,
 813                                  * we can merge this value into the
 814                                  * comparison:
 815                                  *
 816                                  * sub x  ->    nop
 817                                  * jeq #y       jeq #(x+y)
 818                                  */
 819                                 b->s.k += vmap[val].const_val;
 820                                 last->s.code = NOP;
 821                                 done = 0;
 822                         } else if (b->s.k == 0) {
 823                                 /*
 824                                  * If the X register isn't a constant,
 825                                  * and the comparison in the test is
 826                                  * against 0, we can compare with the
 827                                  * X register, instead:
 828                                  *
 829                                  * sub x  ->    nop
 830                                  * jeq #0       jeq x
 831                                  */
 832                                 last->s.code = NOP;
 833                                 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
 834                                 done = 0;
 835                         }
 836                 }
 837                 /*
 838                  * Likewise, a constant subtract can be simplified:
 839                  *
 840                  * sub #x ->    nop
 841                  * jeq #y ->    jeq #(x+y)
 842                  */
 843                 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
 844                         last->s.code = NOP;
 845                         b->s.k += last->s.k;
 846                         done = 0;
 847                 }
 848                 /*
 849                  * And, similarly, a constant AND can be simplified
 850                  * if we're testing against 0, i.e.:
 851                  *
 852                  * and #k       nop
 853                  * jeq #0  ->   jset #k
 854                  */
 855                 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 856                     b->s.k == 0) {
 857                         b->s.k = last->s.k;
 858                         b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 859                         last->s.code = NOP;
 860                         done = 0;
 861                         opt_not(b);
 862                 }
 863         }
 864         /*
 865          * jset #0        ->   never
 866          * jset #ffffffff ->   always
 867          */
 868         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
 869                 if (b->s.k == 0)
 870                         JT(b) = JF(b);
 871                 if (b->s.k == (int)0xffffffff)
 872                         JF(b) = JT(b);
 873         }
 874         /*
 875          * If we're comparing against the index register, and the index
 876          * register is a known constant, we can just compare against that
 877          * constant.
 878          */
 879         val = b->val[X_ATOM];
 880         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
 881                 bpf_int32 v = vmap[val].const_val;
 882                 b->s.code &= ~BPF_X;
 883                 b->s.k = v;
 884         }
 885         /*
 886          * If the accumulator is a known constant, we can compute the
 887          * comparison result.
 888          */
 889         val = b->val[A_ATOM];
 890         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 891                 bpf_int32 v = vmap[val].const_val;
 892                 switch (BPF_OP(b->s.code)) {
 893
 894                 case BPF_JEQ:
 895                         v = v == b->s.k;
 896                         break;
 897
 898                 case BPF_JGT:
 899                         v = (unsigned)v > (unsigned)b->s.k;
 900                         break;
 901
 902                 case BPF_JGE:
 903                         v = (unsigned)v >= (unsigned)b->s.k;
 904                         break;
 905
 906                 case BPF_JSET:
 907                         v &= b->s.k;
 908                         break;
 909
 910                 default:
 911                         abort();
 912                 }
 913                 if (JF(b) != JT(b))
 914                         done = 0;
 915                 if (v)
 916                         JF(b) = JT(b);
 917                 else
 918                         JT(b) = JF(b);
 919         }
 920 }
 921
 922 /*
 923  * Compute the symbolic value of expression of 's', and update
 924  * anything it defines in the value table 'val'.  If 'alter' is true,
 925  * do various optimizations.  This code would be cleaner if symbolic
 926  * evaluation and code transformations weren't folded together.
 927  */
 928 static void
 929 opt_stmt(struct stmt *s, int val[], int alter)
 930 {
 931         int op;
 932         int v;
 933
 934         switch (s->code) {
 935
 936         case BPF_LD|BPF_ABS|BPF_W:
 937         case BPF_LD|BPF_ABS|BPF_H:
 938         case BPF_LD|BPF_ABS|BPF_B:
 939                 v = F(s->code, s->k, 0L);
 940                 vstore(s, &val[A_ATOM], v, alter);
 941                 break;
 942
 943         case BPF_LD|BPF_IND|BPF_W:
 944         case BPF_LD|BPF_IND|BPF_H:
 945         case BPF_LD|BPF_IND|BPF_B:
 946                 v = val[X_ATOM];
 947                 if (alter && vmap[v].is_const) {
 948                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
 949                         s->k += vmap[v].const_val;
 950                         v = F(s->code, s->k, 0L);
 951                         done = 0;
 952                 }
 953                 else
 954                         v = F(s->code, s->k, v);
 955                 vstore(s, &val[A_ATOM], v, alter);
 956                 break;
 957
 958         case BPF_LD|BPF_LEN:
 959                 v = F(s->code, 0L, 0L);
 960                 vstore(s, &val[A_ATOM], v, alter);
 961                 break;
 962
 963         case BPF_LD|BPF_IMM:
 964                 v = K(s->k);
 965                 vstore(s, &val[A_ATOM], v, alter);
 966                 break;
 967
 968         case BPF_LDX|BPF_IMM:
 969                 v = K(s->k);
 970                 vstore(s, &val[X_ATOM], v, alter);
 971                 break;
 972
 973         case BPF_LDX|BPF_MSH|BPF_B:
 974                 v = F(s->code, s->k, 0L);
 975                 vstore(s, &val[X_ATOM], v, alter);
 976                 break;
 977
 978         case BPF_ALU|BPF_NEG:
 979                 if (alter && vmap[val[A_ATOM]].is_const) {
 980                         s->code = BPF_LD|BPF_IMM;
 981                         s->k = -vmap[val[A_ATOM]].const_val;
 982                         val[A_ATOM] = K(s->k);
 983                 }
 984                 else
 985                         val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
 986                 break;
 987
 988         case BPF_ALU|BPF_ADD|BPF_K:
 989         case BPF_ALU|BPF_SUB|BPF_K:
 990         case BPF_ALU|BPF_MUL|BPF_K:
 991         case BPF_ALU|BPF_DIV|BPF_K:
 992         case BPF_ALU|BPF_MOD|BPF_K:
 993         case BPF_ALU|BPF_AND|BPF_K:
 994         case BPF_ALU|BPF_OR|BPF_K:
 995         case BPF_ALU|BPF_XOR|BPF_K:
 996         case BPF_ALU|BPF_LSH|BPF_K:
 997         case BPF_ALU|BPF_RSH|BPF_K:
 998                 op = BPF_OP(s->code);
 999                 if (alter) {
1000                         if (s->k == 0) {
1001                                 /* don't optimize away "sub #0"
1002                                  * as it may be needed later to
1003                                  * fixup the generated math code */
1004                                 if (op == BPF_ADD ||
1005                                     op == BPF_LSH || op == BPF_RSH ||
1006                                     op == BPF_OR || op == BPF_XOR) {
1007                                         s->code = NOP;
1008                                         break;
1009                                 }
1010                                 if (op == BPF_MUL || op == BPF_AND) {
1011                                         s->code = BPF_LD|BPF_IMM;
1012                                         val[A_ATOM] = K(s->k);
1013                                         break;
1014                                 }
1015                         }
1016                         if (vmap[val[A_ATOM]].is_const) {
1017                                 fold_op(s, val[A_ATOM], K(s->k));
1018                                 val[A_ATOM] = K(s->k);
1019                                 break;
1020                         }
1021                 }
1022                 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
1023                 break;
1024
1025         case BPF_ALU|BPF_ADD|BPF_X:
1026         case BPF_ALU|BPF_SUB|BPF_X:
1027         case BPF_ALU|BPF_MUL|BPF_X:
1028         case BPF_ALU|BPF_DIV|BPF_X:
1029         case BPF_ALU|BPF_MOD|BPF_X:
1030         case BPF_ALU|BPF_AND|BPF_X:
1031         case BPF_ALU|BPF_OR|BPF_X:
1032         case BPF_ALU|BPF_XOR|BPF_X:
1033         case BPF_ALU|BPF_LSH|BPF_X:
1034         case BPF_ALU|BPF_RSH|BPF_X:
1035                 op = BPF_OP(s->code);
1036                 if (alter && vmap[val[X_ATOM]].is_const) {
1037                         if (vmap[val[A_ATOM]].is_const) {
1038                                 fold_op(s, val[A_ATOM], val[X_ATOM]);
1039                                 val[A_ATOM] = K(s->k);
1040                         }
1041                         else {
1042                                 s->code = BPF_ALU|BPF_K|op;
1043                                 s->k = vmap[val[X_ATOM]].const_val;
1044                                 done = 0;
1045                                 val[A_ATOM] =
1046                                         F(s->code, val[A_ATOM], K(s->k));
1047                         }
1048                         break;
1049                 }
1050                 /*
1051                  * Check if we're doing something to an accumulator
1052                  * that is 0, and simplify.  This may not seem like
1053                  * much of a simplification but it could open up further
1054                  * optimizations.
1055                  * XXX We could also check for mul by 1, etc.
1056                  */
1057                 if (alter && vmap[val[A_ATOM]].is_const
1058                     && vmap[val[A_ATOM]].const_val == 0) {
1059                         if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1060                                 s->code = BPF_MISC|BPF_TXA;
1061                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1062                                 break;
1063                         }
1064                         else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1065                                  op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1066                                 s->code = BPF_LD|BPF_IMM;
1067                                 s->k = 0;
1068                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1069                                 break;
1070                         }
1071                         else if (op == BPF_NEG) {
1072                                 s->code = NOP;
1073                                 break;
1074                         }
1075                 }
1076                 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1077                 break;
1078
1079         case BPF_MISC|BPF_TXA:
1080                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1081                 break;
1082
1083         case BPF_LD|BPF_MEM:
1084                 v = val[s->k];
1085                 if (alter && vmap[v].is_const) {
1086                         s->code = BPF_LD|BPF_IMM;
1087                         s->k = vmap[v].const_val;
1088                         done = 0;
1089                 }
1090                 vstore(s, &val[A_ATOM], v, alter);
1091                 break;
1092
1093         case BPF_MISC|BPF_TAX:
1094                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1095                 break;
1096
1097         case BPF_LDX|BPF_MEM:
1098                 v = val[s->k];
1099                 if (alter && vmap[v].is_const) {
1100                         s->code = BPF_LDX|BPF_IMM;
1101                         s->k = vmap[v].const_val;
1102                         done = 0;
1103                 }
1104                 vstore(s, &val[X_ATOM], v, alter);
1105                 break;
1106
1107         case BPF_ST:
1108                 vstore(s, &val[s->k], val[A_ATOM], alter);
1109                 break;
1110
1111         case BPF_STX:
1112                 vstore(s, &val[s->k], val[X_ATOM], alter);
1113                 break;
1114         }
1115 }
1116
1117 static void
1118 deadstmt(register struct stmt *s, register struct stmt *last[])
1119 {
1120         register int atom;
1121
1122         atom = atomuse(s);
1123         if (atom >= 0) {
1124                 if (atom == AX_ATOM) {
1125                         last[X_ATOM] = 0;
1126                         last[A_ATOM] = 0;
1127                 }
1128                 else
1129                         last[atom] = 0;
1130         }
1131         atom = atomdef(s);
1132         if (atom >= 0) {
1133                 if (last[atom]) {
1134                         done = 0;
1135                         last[atom]->code = NOP;
1136                 }
1137                 last[atom] = s;
1138         }
1139 }
1140
1141 static void
1142 opt_deadstores(register struct block *b)
1143 {
1144         register struct slist *s;
1145         register int atom;
1146         struct stmt *last[N_ATOMS];
1147
1148         memset((char *)last, 0, sizeof last);
1149
1150         for (s = b->stmts; s != 0; s = s->next)
1151                 deadstmt(&s->s, last);
1152         deadstmt(&b->s, last);
1153
1154         for (atom = 0; atom < N_ATOMS; ++atom)
1155                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1156                         last[atom]->code = NOP;
1157                         done = 0;
1158                 }
1159 }
1160
1161 static void
1162 opt_blk(struct block *b, int do_stmts)
1163 {
1164         struct slist *s;
1165         struct edge *p;
1166         int i;
1167         bpf_int32 aval, xval;
1168
1169 #if 0
1170         for (s = b->stmts; s && s->next; s = s->next)
1171                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1172                         do_stmts = 0;
1173                         break;
1174                 }
1175 #endif
1176
1177         /*
1178          * Initialize the atom values.
1179          */
1180         p = b->in_edges;
1181         if (p == 0) {
1182                 /*
1183                  * We have no predecessors, so everything is undefined
1184                  * upon entry to this block.
1185                  */
1186                 memset((char *)b->val, 0, sizeof(b->val));
1187         } else {
1188                 /*
1189                  * Inherit values from our predecessors.
1190                  *
1191                  * First, get the values from the predecessor along the
1192                  * first edge leading to this node.
1193                  */
1194                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1195                 /*
1196                  * Now look at all the other nodes leading to this node.
1197                  * If, for the predecessor along that edge, a register
1198                  * has a different value from the one we have (i.e.,
1199                  * control paths are merging, and the merging paths
1200                  * assign different values to that register), give the
1201                  * register the undefined value of 0.
1202                  */
1203                 while ((p = p->next) != NULL) {
1204                         for (i = 0; i < N_ATOMS; ++i)
1205                                 if (b->val[i] != p->pred->val[i])
1206                                         b->val[i] = 0;
1207                 }
1208         }
1209         aval = b->val[A_ATOM];
1210         xval = b->val[X_ATOM];
1211         for (s = b->stmts; s; s = s->next)
1212                 opt_stmt(&s->s, b->val, do_stmts);
1213
1214         /*
1215          * This is a special case: if we don't use anything from this
1216          * block, and we load the accumulator or index register with a
1217          * value that is already there, or if this block is a return,
1218          * eliminate all the statements.
1219          *
1220          * XXX - what if it does a store?
1221          *
1222          * XXX - why does it matter whether we use anything from this
1223          * block?  If the accumulator or index register doesn't change
1224          * its value, isn't that OK even if we use that value?
1225          *
1226          * XXX - if we load the accumulator with a different value,
1227          * and the block ends with a conditional branch, we obviously
1228          * can't eliminate it, as the branch depends on that value.
1229          * For the index register, the conditional branch only depends
1230          * on the index register value if the test is against the index
1231          * register value rather than a constant; if nothing uses the
1232          * value we put into the index register, and we're not testing
1233          * against the index register's value, and there aren't any
1234          * other problems that would keep us from eliminating this
1235          * block, can we eliminate it?
1236          */
1237         if (do_stmts &&
1238             ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
1239               xval != 0 && b->val[X_ATOM] == xval) ||
1240              BPF_CLASS(b->s.code) == BPF_RET)) {
1241                 if (b->stmts != 0) {
1242                         b->stmts = 0;
1243                         done = 0;
1244                 }
1245         } else {
1246                 opt_peep(b);
1247                 opt_deadstores(b);
1248         }
1249         /*
1250          * Set up values for branch optimizer.
1251          */
1252         if (BPF_SRC(b->s.code) == BPF_K)
1253                 b->oval = K(b->s.k);
1254         else
1255                 b->oval = b->val[X_ATOM];
1256         b->et.code = b->s.code;
1257         b->ef.code = -b->s.code;
1258 }
1259
1260 /*
1261  * Return true if any register that is used on exit from 'succ', has
1262  * an exit value that is different from the corresponding exit value
1263  * from 'b'.
1264  */
1265 static int
1266 use_conflict(struct block *b, struct block *succ)
1267 {
1268         int atom;
1269         atomset use = succ->out_use;
1270
1271         if (use == 0)
1272                 return 0;
1273
1274         for (atom = 0; atom < N_ATOMS; ++atom)
1275                 if (ATOMELEM(use, atom))
1276                         if (b->val[atom] != succ->val[atom])
1277                                 return 1;
1278         return 0;
1279 }
1280
1281 static struct block *
1282 fold_edge(struct block *child, struct edge *ep)
1283 {
1284         int sense;
1285         int aval0, aval1, oval0, oval1;
1286         int code = ep->code;
1287
1288         if (code < 0) {
1289                 code = -code;
1290                 sense = 0;
1291         } else
1292                 sense = 1;
1293
1294         if (child->s.code != code)
1295                 return 0;
1296
1297         aval0 = child->val[A_ATOM];
1298         oval0 = child->oval;
1299         aval1 = ep->pred->val[A_ATOM];
1300         oval1 = ep->pred->oval;
1301
1302         if (aval0 != aval1)
1303                 return 0;
1304
1305         if (oval0 == oval1)
1306                 /*
1307                  * The operands of the branch instructions are
1308                  * identical, so the result is true if a true
1309                  * branch was taken to get here, otherwise false.
1310                  */
1311                 return sense ? JT(child) : JF(child);
1312
1313         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1314                 /*
1315                  * At this point, we only know the comparison if we
1316                  * came down the true branch, and it was an equality
1317                  * comparison with a constant.
1318                  *
1319                  * I.e., if we came down the true branch, and the branch
1320                  * was an equality comparison with a constant, we know the
1321                  * accumulator contains that constant.  If we came down
1322                  * the false branch, or the comparison wasn't with a
1323                  * constant, we don't know what was in the accumulator.
1324                  *
1325                  * We rely on the fact that distinct constants have distinct
1326                  * value numbers.
1327                  */
1328                 return JF(child);
1329
1330         return 0;
1331 }
1332
1333 static void
1334 opt_j(struct edge *ep)
1335 {
1336         register int i, k;
1337         register struct block *target;
1338
1339         if (JT(ep->succ) == 0)
1340                 return;
1341
1342         if (JT(ep->succ) == JF(ep->succ)) {
1343                 /*
1344                  * Common branch targets can be eliminated, provided
1345                  * there is no data dependency.
1346                  */
1347                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1348                         done = 0;
1349                         ep->succ = JT(ep->succ);
1350                 }
1351         }
1352         /*
1353          * For each edge dominator that matches the successor of this
1354          * edge, promote the edge successor to the its grandchild.
1355          *
1356          * XXX We violate the set abstraction here in favor a reasonably
1357          * efficient loop.
1358          */
1359  top:
1360         for (i = 0; i < edgewords; ++i) {
1361                 register bpf_u_int32 x = ep->edom[i];
1362
1363                 while (x != 0) {
1364                         k = ffs(x) - 1;
1365                         x &=~ (1 << k);
1366                         k += i * BITS_PER_WORD;
1367
1368                         target = fold_edge(ep->succ, edges[k]);
1369                         /*
1370                          * Check that there is no data dependency between
1371                          * nodes that will be violated if we move the edge.
1372                          */
1373                         if (target != 0 && !use_conflict(ep->pred, target)) {
1374                                 done = 0;
1375                                 ep->succ = target;
1376                                 if (JT(target) != 0)
1377                                         /*
1378                                          * Start over unless we hit a leaf.
1379                                          */
1380                                         goto top;
1381                                 return;
1382                         }
1383                 }
1384         }
1385 }
1386
1387
1388 static void
1389 or_pullup(struct block *b)
1390 {
1391         int val, at_top;
1392         struct block *pull;
1393         struct block **diffp, **samep;
1394         struct edge *ep;
1395
1396         ep = b->in_edges;
1397         if (ep == 0)
1398                 return;
1399
1400         /*
1401          * Make sure each predecessor loads the same value.
1402          * XXX why?
1403          */
1404         val = ep->pred->val[A_ATOM];
1405         for (ep = ep->next; ep != 0; ep = ep->next)
1406                 if (val != ep->pred->val[A_ATOM])
1407                         return;
1408
1409         if (JT(b->in_edges->pred) == b)
1410                 diffp = &JT(b->in_edges->pred);
1411         else
1412                 diffp = &JF(b->in_edges->pred);
1413
1414         at_top = 1;
1415         while (1) {
1416                 if (*diffp == 0)
1417                         return;
1418
1419                 if (JT(*diffp) != JT(b))
1420                         return;
1421
1422                 if (!SET_MEMBER((*diffp)->dom, b->id))
1423                         return;
1424
1425                 if ((*diffp)->val[A_ATOM] != val)
1426                         break;
1427
1428                 diffp = &JF(*diffp);
1429                 at_top = 0;
1430         }
1431         samep = &JF(*diffp);
1432         while (1) {
1433                 if (*samep == 0)
1434                         return;
1435
1436                 if (JT(*samep) != JT(b))
1437                         return;
1438
1439                 if (!SET_MEMBER((*samep)->dom, b->id))
1440                         return;
1441
1442                 if ((*samep)->val[A_ATOM] == val)
1443                         break;
1444
1445                 /* XXX Need to check that there are no data dependencies
1446                    between dp0 and dp1.  Currently, the code generator
1447                    will not produce such dependencies. */
1448                 samep = &JF(*samep);
1449         }
1450 #ifdef notdef
1451         /* XXX This doesn't cover everything. */
1452         for (i = 0; i < N_ATOMS; ++i)
1453                 if ((*samep)->val[i] != pred->val[i])
1454                         return;
1455 #endif
1456         /* Pull up the node. */
1457         pull = *samep;
1458         *samep = JF(pull);
1459         JF(pull) = *diffp;
1460
1461         /*
1462          * At the top of the chain, each predecessor needs to point at the
1463          * pulled up node.  Inside the chain, there is only one predecessor
1464          * to worry about.
1465          */
1466         if (at_top) {
1467                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1468                         if (JT(ep->pred) == b)
1469                                 JT(ep->pred) = pull;
1470                         else
1471                                 JF(ep->pred) = pull;
1472                 }
1473         }
1474         else
1475                 *diffp = pull;
1476
1477         done = 0;
1478 }
1479
1480 static void
1481 and_pullup(struct block *b)
1482 {
1483         int val, at_top;
1484         struct block *pull;
1485         struct block **diffp, **samep;
1486         struct edge *ep;
1487
1488         ep = b->in_edges;
1489         if (ep == 0)
1490                 return;
1491
1492         /*
1493          * Make sure each predecessor loads the same value.
1494          */
1495         val = ep->pred->val[A_ATOM];
1496         for (ep = ep->next; ep != 0; ep = ep->next)
1497                 if (val != ep->pred->val[A_ATOM])
1498                         return;
1499
1500         if (JT(b->in_edges->pred) == b)
1501                 diffp = &JT(b->in_edges->pred);
1502         else
1503                 diffp = &JF(b->in_edges->pred);
1504
1505         at_top = 1;
1506         while (1) {
1507                 if (*diffp == 0)
1508                         return;
1509
1510                 if (JF(*diffp) != JF(b))
1511                         return;
1512
1513                 if (!SET_MEMBER((*diffp)->dom, b->id))
1514                         return;
1515
1516                 if ((*diffp)->val[A_ATOM] != val)
1517                         break;
1518
1519                 diffp = &JT(*diffp);
1520                 at_top = 0;
1521         }
1522         samep = &JT(*diffp);
1523         while (1) {
1524                 if (*samep == 0)
1525                         return;
1526
1527                 if (JF(*samep) != JF(b))
1528                         return;
1529
1530                 if (!SET_MEMBER((*samep)->dom, b->id))
1531                         return;
1532
1533                 if ((*samep)->val[A_ATOM] == val)
1534                         break;
1535
1536                 /* XXX Need to check that there are no data dependencies
1537                    between diffp and samep.  Currently, the code generator
1538                    will not produce such dependencies. */
1539                 samep = &JT(*samep);
1540         }
1541 #ifdef notdef
1542         /* XXX This doesn't cover everything. */
1543         for (i = 0; i < N_ATOMS; ++i)
1544                 if ((*samep)->val[i] != pred->val[i])
1545                         return;
1546 #endif
1547         /* Pull up the node. */
1548         pull = *samep;
1549         *samep = JT(pull);
1550         JT(pull) = *diffp;
1551
1552         /*
1553          * At the top of the chain, each predecessor needs to point at the
1554          * pulled up node.  Inside the chain, there is only one predecessor
1555          * to worry about.
1556          */
1557         if (at_top) {
1558                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1559                         if (JT(ep->pred) == b)
1560                                 JT(ep->pred) = pull;
1561                         else
1562                                 JF(ep->pred) = pull;
1563                 }
1564         }
1565         else
1566                 *diffp = pull;
1567
1568         done = 0;
1569 }
1570
1571 static void
1572 opt_blks(struct block *root, int do_stmts)
1573 {
1574         int i, maxlevel;
1575         struct block *p;
1576
1577         init_val();
1578         maxlevel = root->level;
1579
1580         find_inedges(root);
1581         for (i = maxlevel; i >= 0; --i)
1582                 for (p = levels[i]; p; p = p->link)
1583                         opt_blk(p, do_stmts);
1584
1585         if (do_stmts)
1586                 /*
1587                  * No point trying to move branches; it can't possibly
1588                  * make a difference at this point.
1589                  */
1590                 return;
1591
1592         for (i = 1; i <= maxlevel; ++i) {
1593                 for (p = levels[i]; p; p = p->link) {
1594                         opt_j(&p->et);
1595                         opt_j(&p->ef);
1596                 }
1597         }
1598
1599         find_inedges(root);
1600         for (i = 1; i <= maxlevel; ++i) {
1601                 for (p = levels[i]; p; p = p->link) {
1602                         or_pullup(p);
1603                         and_pullup(p);
1604                 }
1605         }
1606 }
1607
1608 static inline void
1609 link_inedge(struct edge *parent, struct block *child)
1610 {
1611         parent->next = child->in_edges;
1612         child->in_edges = parent;
1613 }
1614
1615 static void
1616 find_inedges(struct block *root)
1617 {
1618         int i;
1619         struct block *b;
1620
1621         for (i = 0; i < n_blocks; ++i)
1622                 blocks[i]->in_edges = 0;
1623
1624         /*
1625          * Traverse the graph, adding each edge to the predecessor
1626          * list of its successors.  Skip the leaves (i.e. level 0).
1627          */
1628         for (i = root->level; i > 0; --i) {
1629                 for (b = levels[i]; b != 0; b = b->link) {
1630                         link_inedge(&b->et, JT(b));
1631                         link_inedge(&b->ef, JF(b));
1632                 }
1633         }
1634 }
1635
1636 static void
1637 opt_root(struct block **b)
1638 {
1639         struct slist *tmp, *s;
1640
1641         s = (*b)->stmts;
1642         (*b)->stmts = 0;
1643         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1644                 *b = JT(*b);
1645
1646         tmp = (*b)->stmts;
1647         if (tmp != 0)
1648                 sappend(s, tmp);
1649         (*b)->stmts = s;
1650
1651         /*
1652          * If the root node is a return, then there is no
1653          * point executing any statements (since the bpf machine
1654          * has no side effects).
1655          */
1656         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1657                 (*b)->stmts = 0;
1658 }
1659
1660 static void
1661 opt_loop(struct block *root, int do_stmts)
1662 {
1663
1664 #ifdef BDEBUG
1665         if (dflag > 1) {
1666                 printf("opt_loop(root, %d) begin\n", do_stmts);
1667                 opt_dump(root);
1668         }
1669 #endif
1670         do {
1671                 done = 1;
1672                 find_levels(root);
1673                 find_dom(root);
1674                 find_closure(root);
1675                 find_ud(root);
1676                 find_edom(root);
1677                 opt_blks(root, do_stmts);
1678 #ifdef BDEBUG
1679                 if (dflag > 1) {
1680                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
1681                         opt_dump(root);
1682                 }
1683 #endif
1684         } while (!done);
1685 }
1686
1687 /*
1688  * Optimize the filter code in its dag representation.
1689  */
1690 void
1691 bpf_optimize(struct block **rootp)
1692 {
1693         struct block *root;
1694
1695         root = *rootp;
1696
1697         opt_init(root);
1698         opt_loop(root, 0);
1699         opt_loop(root, 1);
1700         intern_blocks(root);
1701 #ifdef BDEBUG
1702         if (dflag > 1) {
1703                 printf("after intern_blocks()\n");
1704                 opt_dump(root);
1705         }
1706 #endif
1707         opt_root(rootp);
1708 #ifdef BDEBUG
1709         if (dflag > 1) {
1710                 printf("after opt_root()\n");
1711                 opt_dump(root);
1712         }
1713 #endif
1714         opt_cleanup();
1715 }
1716
1717 static void
1718 make_marks(struct block *p)
1719 {
1720         if (!isMarked(p)) {
1721                 Mark(p);
1722                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1723                         make_marks(JT(p));
1724                         make_marks(JF(p));
1725                 }
1726         }
1727 }
1728
1729 /*
1730  * Mark code array such that isMarked(i) is true
1731  * only for nodes that are alive.
1732  */
1733 static void
1734 mark_code(struct block *p)
1735 {
1736         cur_mark += 1;
1737         make_marks(p);
1738 }
1739
1740 /*
1741  * True iff the two stmt lists load the same value from the packet into
1742  * the accumulator.
1743  */
1744 static int
1745 eq_slist(struct slist *x, struct slist *y)
1746 {
1747         while (1) {
1748                 while (x && x->s.code == NOP)
1749                         x = x->next;
1750                 while (y && y->s.code == NOP)
1751                         y = y->next;
1752                 if (x == 0)
1753                         return y == 0;
1754                 if (y == 0)
1755                         return x == 0;
1756                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1757                         return 0;
1758                 x = x->next;
1759                 y = y->next;
1760         }
1761 }
1762
1763 static inline int
1764 eq_blk(struct block *b0, struct block *b1)
1765 {
1766         if (b0->s.code == b1->s.code &&
1767             b0->s.k == b1->s.k &&
1768             b0->et.succ == b1->et.succ &&
1769             b0->ef.succ == b1->ef.succ)
1770                 return eq_slist(b0->stmts, b1->stmts);
1771         return 0;
1772 }
1773
1774 static void
1775 intern_blocks(struct block *root)
1776 {
1777         struct block *p;
1778         int i, j;
1779         int done1; /* don't shadow global */
1780  top:
1781         done1 = 1;
1782         for (i = 0; i < n_blocks; ++i)
1783                 blocks[i]->link = 0;
1784
1785         mark_code(root);
1786
1787         for (i = n_blocks - 1; --i >= 0; ) {
1788                 if (!isMarked(blocks[i]))
1789                         continue;
1790                 for (j = i + 1; j < n_blocks; ++j) {
1791                         if (!isMarked(blocks[j]))
1792                                 continue;
1793                         if (eq_blk(blocks[i], blocks[j])) {
1794                                 blocks[i]->link = blocks[j]->link ?
1795                                         blocks[j]->link : blocks[j];
1796                                 break;
1797                         }
1798                 }
1799         }
1800         for (i = 0; i < n_blocks; ++i) {
1801                 p = blocks[i];
1802                 if (JT(p) == 0)
1803                         continue;
1804                 if (JT(p)->link) {
1805                         done1 = 0;
1806                         JT(p) = JT(p)->link;
1807                 }
1808                 if (JF(p)->link) {
1809                         done1 = 0;
1810                         JF(p) = JF(p)->link;
1811                 }
1812         }
1813         if (!done1)
1814                 goto top;
1815 }
1816
1817 static void
1818 opt_cleanup(void)
1819 {
1820         free((void *)vnode_base);
1821         free((void *)vmap);
1822         free((void *)edges);
1823         free((void *)space);
1824         free((void *)levels);
1825         free((void *)blocks);
1826 }
1827
1828 /*
1829  * Return the number of stmts in 's'.
1830  */
1831 static u_int
1832 slength(struct slist *s)
1833 {
1834         u_int n = 0;
1835
1836         for (; s; s = s->next)
1837                 if (s->s.code != NOP)
1838                         ++n;
1839         return n;
1840 }
1841
1842 /*
1843  * Return the number of nodes reachable by 'p'.
1844  * All nodes should be initially unmarked.
1845  */
1846 static int
1847 count_blocks(struct block *p)
1848 {
1849         if (p == 0 || isMarked(p))
1850                 return 0;
1851         Mark(p);
1852         return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1853 }
1854
1855 /*
1856  * Do a depth first search on the flow graph, numbering the
1857  * the basic blocks, and entering them into the 'blocks' array.`
1858  */
1859 static void
1860 number_blks_r(struct block *p)
1861 {
1862         int n;
1863
1864         if (p == 0 || isMarked(p))
1865                 return;
1866
1867         Mark(p);
1868         n = n_blocks++;
1869         p->id = n;
1870         blocks[n] = p;
1871
1872         number_blks_r(JT(p));
1873         number_blks_r(JF(p));
1874 }
1875
1876 /*
1877  * Return the number of stmts in the flowgraph reachable by 'p'.
1878  * The nodes should be unmarked before calling.
1879  *
1880  * Note that "stmts" means "instructions", and that this includes
1881  *
1882  *      side-effect statements in 'p' (slength(p->stmts));
1883  *
1884  *      statements in the true branch from 'p' (count_stmts(JT(p)));
1885  *
1886  *      statements in the false branch from 'p' (count_stmts(JF(p)));
1887  *
1888  *      the conditional jump itself (1);
1889  *
1890  *      an extra long jump if the true branch requires it (p->longjt);
1891  *
1892  *      an extra long jump if the false branch requires it (p->longjf).
1893  */
1894 static u_int
1895 count_stmts(struct block *p)
1896 {
1897         u_int n;
1898
1899         if (p == 0 || isMarked(p))
1900                 return 0;
1901         Mark(p);
1902         n = count_stmts(JT(p)) + count_stmts(JF(p));
1903         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1904 }
1905
1906 /*
1907  * Allocate memory.  All allocation is done before optimization
1908  * is begun.  A linear bound on the size of all data structures is computed
1909  * from the total number of blocks and/or statements.
1910  */
1911 static void
1912 opt_init(struct block *root)
1913 {
1914         bpf_u_int32 *p;
1915         int i, n, max_stmts;
1916
1917         /*
1918          * First, count the blocks, so we can malloc an array to map
1919          * block number to block.  Then, put the blocks into the array.
1920          */
1921         unMarkAll();
1922         n = count_blocks(root);
1923         blocks = (struct block **)calloc(n, sizeof(*blocks));
1924         if (blocks == NULL)
1925                 bpf_error("malloc");
1926         unMarkAll();
1927         n_blocks = 0;
1928         number_blks_r(root);
1929
1930         n_edges = 2 * n_blocks;
1931         edges = (struct edge **)calloc(n_edges, sizeof(*edges));
1932         if (edges == NULL)
1933                 bpf_error("malloc");
1934
1935         /*
1936          * The number of levels is bounded by the number of nodes.
1937          */
1938         levels = (struct block **)calloc(n_blocks, sizeof(*levels));
1939         if (levels == NULL)
1940                 bpf_error("malloc");
1941
1942         edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1943         nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1944
1945         /* XXX */
1946         space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1947                                  + n_edges * edgewords * sizeof(*space));
1948         if (space == NULL)
1949                 bpf_error("malloc");
1950         p = space;
1951         all_dom_sets = p;
1952         for (i = 0; i < n; ++i) {
1953                 blocks[i]->dom = p;
1954                 p += nodewords;
1955         }
1956         all_closure_sets = p;
1957         for (i = 0; i < n; ++i) {
1958                 blocks[i]->closure = p;
1959                 p += nodewords;
1960         }
1961         all_edge_sets = p;
1962         for (i = 0; i < n; ++i) {
1963                 register struct block *b = blocks[i];
1964
1965                 b->et.edom = p;
1966                 p += edgewords;
1967                 b->ef.edom = p;
1968                 p += edgewords;
1969                 b->et.id = i;
1970                 edges[i] = &b->et;
1971                 b->ef.id = n_blocks + i;
1972                 edges[n_blocks + i] = &b->ef;
1973                 b->et.pred = b;
1974                 b->ef.pred = b;
1975         }
1976         max_stmts = 0;
1977         for (i = 0; i < n; ++i)
1978                 max_stmts += slength(blocks[i]->stmts) + 1;
1979         /*
1980          * We allocate at most 3 value numbers per statement,
1981          * so this is an upper bound on the number of valnodes
1982          * we'll need.
1983          */
1984         maxval = 3 * max_stmts;
1985         vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap));
1986         vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base));
1987         if (vmap == NULL || vnode_base == NULL)
1988                 bpf_error("malloc");
1989 }
1990
1991 /*
1992  * Some pointers used to convert the basic block form of the code,
1993  * into the array form that BPF requires.  'fstart' will point to
1994  * the malloc'd array while 'ftail' is used during the recursive traversal.
1995  */
1996 static struct bpf_insn *fstart;
1997 static struct bpf_insn *ftail;
1998
1999 #ifdef BDEBUG
2000 int bids[1000];
2001 #endif
2002
2003 /*
2004  * Returns true if successful.  Returns false if a branch has
2005  * an offset that is too large.  If so, we have marked that
2006  * branch so that on a subsequent iteration, it will be treated
2007  * properly.
2008  */
2009 static int
2010 convert_code_r(struct block *p)
2011 {
2012         struct bpf_insn *dst;
2013         struct slist *src;
2014         u_int slen;
2015         u_int off;
2016         int extrajmps;          /* number of extra jumps inserted */
2017         struct slist **offset = NULL;
2018
2019         if (p == 0 || isMarked(p))
2020                 return (1);
2021         Mark(p);
2022
2023         if (convert_code_r(JF(p)) == 0)
2024                 return (0);
2025         if (convert_code_r(JT(p)) == 0)
2026                 return (0);
2027
2028         slen = slength(p->stmts);
2029         dst = ftail -= (slen + 1 + p->longjt + p->longjf);
2030                 /* inflate length by any extra jumps */
2031
2032         p->offset = dst - fstart;
2033
2034         /* generate offset[] for convenience  */
2035         if (slen) {
2036                 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2037                 if (!offset) {
2038                         bpf_error("not enough core");
2039                         /*NOTREACHED*/
2040                 }
2041         }
2042         src = p->stmts;
2043         for (off = 0; off < slen && src; off++) {
2044 #if 0
2045                 printf("off=%d src=%x\n", off, src);
2046 #endif
2047                 offset[off] = src;
2048                 src = src->next;
2049         }
2050
2051         off = 0;
2052         for (src = p->stmts; src; src = src->next) {
2053                 if (src->s.code == NOP)
2054                         continue;
2055                 dst->code = (u_short)src->s.code;
2056                 dst->k = src->s.k;
2057
2058                 /* fill block-local relative jump */
2059                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2060 #if 0
2061                         if (src->s.jt || src->s.jf) {
2062                                 bpf_error("illegal jmp destination");
2063                                 /*NOTREACHED*/
2064                         }
2065 #endif
2066                         goto filled;
2067                 }
2068                 if (off == slen - 2)    /*???*/
2069                         goto filled;
2070
2071             {
2072                 u_int i;
2073                 int jt, jf;
2074 static const char ljerr[] = "%s for block-local relative jump: off=%d";
2075
2076 #if 0
2077                 printf("code=%x off=%d %x %x\n", src->s.code,
2078                         off, src->s.jt, src->s.jf);
2079 #endif
2080
2081                 if (!src->s.jt || !src->s.jf) {
2082                         bpf_error(ljerr, "no jmp destination", off);
2083                         /*NOTREACHED*/
2084                 }
2085
2086                 jt = jf = 0;
2087                 for (i = 0; i < slen; i++) {
2088                         if (offset[i] == src->s.jt) {
2089                                 if (jt) {
2090                                         bpf_error(ljerr, "multiple matches", off);
2091                                         /*NOTREACHED*/
2092                                 }
2093
2094                                 dst->jt = i - off - 1;
2095                                 jt++;
2096                         }
2097                         if (offset[i] == src->s.jf) {
2098                                 if (jf) {
2099                                         bpf_error(ljerr, "multiple matches", off);
2100                                         /*NOTREACHED*/
2101                                 }
2102                                 dst->jf = i - off - 1;
2103                                 jf++;
2104                         }
2105                 }
2106                 if (!jt || !jf) {
2107                         bpf_error(ljerr, "no destination found", off);
2108                         /*NOTREACHED*/
2109                 }
2110             }
2111 filled:
2112                 ++dst;
2113                 ++off;
2114         }
2115         if (offset)
2116                 free(offset);
2117
2118 #ifdef BDEBUG
2119         bids[dst - fstart] = p->id + 1;
2120 #endif
2121         dst->code = (u_short)p->s.code;
2122         dst->k = p->s.k;
2123         if (JT(p)) {
2124                 extrajmps = 0;
2125                 off = JT(p)->offset - (p->offset + slen) - 1;
2126                 if (off >= 256) {
2127                     /* offset too large for branch, must add a jump */
2128                     if (p->longjt == 0) {
2129                         /* mark this instruction and retry */
2130                         p->longjt++;
2131                         return(0);
2132                     }
2133                     /* branch if T to following jump */
2134                     dst->jt = extrajmps;
2135                     extrajmps++;
2136                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2137                     dst[extrajmps].k = off - extrajmps;
2138                 }
2139                 else
2140                     dst->jt = off;
2141                 off = JF(p)->offset - (p->offset + slen) - 1;
2142                 if (off >= 256) {
2143                     /* offset too large for branch, must add a jump */
2144                     if (p->longjf == 0) {
2145                         /* mark this instruction and retry */
2146                         p->longjf++;
2147                         return(0);
2148                     }
2149                     /* branch if F to following jump */
2150                     /* if two jumps are inserted, F goes to second one */
2151                     dst->jf = extrajmps;
2152                     extrajmps++;
2153                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2154                     dst[extrajmps].k = off - extrajmps;
2155                 }
2156                 else
2157                     dst->jf = off;
2158         }
2159         return (1);
2160 }
2161
2162
2163 /*
2164  * Convert flowgraph intermediate representation to the
2165  * BPF array representation.  Set *lenp to the number of instructions.
2166  *
2167  * This routine does *NOT* leak the memory pointed to by fp.  It *must
2168  * not* do free(fp) before returning fp; doing so would make no sense,
2169  * as the BPF array pointed to by the return value of icode_to_fcode()
2170  * must be valid - it's being returned for use in a bpf_program structure.
2171  *
2172  * If it appears that icode_to_fcode() is leaking, the problem is that
2173  * the program using pcap_compile() is failing to free the memory in
2174  * the BPF program when it's done - the leak is in the program, not in
2175  * the routine that happens to be allocating the memory.  (By analogy, if
2176  * a program calls fopen() without ever calling fclose() on the FILE *,
2177  * it will leak the FILE structure; the leak is not in fopen(), it's in
2178  * the program.)  Change the program to use pcap_freecode() when it's
2179  * done with the filter program.  See the pcap man page.
2180  */
2181 struct bpf_insn *
2182 icode_to_fcode(struct block *root, u_int *lenp)
2183 {
2184         u_int n;
2185         struct bpf_insn *fp;
2186
2187         /*
2188          * Loop doing convert_code_r() until no branches remain
2189          * with too-large offsets.
2190          */
2191         while (1) {
2192             unMarkAll();
2193             n = *lenp = count_stmts(root);
2194
2195             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2196             if (fp == NULL)
2197                     bpf_error("malloc");
2198             memset((char *)fp, 0, sizeof(*fp) * n);
2199             fstart = fp;
2200             ftail = fp + n;
2201
2202             unMarkAll();
2203             if (convert_code_r(root))
2204                 break;
2205             free(fp);
2206         }
2207
2208         return fp;
2209 }
2210
2211 /*
2212  * Make a copy of a BPF program and put it in the "fcode" member of
2213  * a "pcap_t".
2214  *
2215  * If we fail to allocate memory for the copy, fill in the "errbuf"
2216  * member of the "pcap_t" with an error message, and return -1;
2217  * otherwise, return 0.
2218  */
2219 int
2220 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2221 {
2222         size_t prog_size;
2223
2224         /*
2225          * Validate the program.
2226          */
2227         if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2228                 snprintf(p->errbuf, sizeof(p->errbuf),
2229                         "BPF program is not valid");
2230                 return (-1);
2231         }
2232
2233         /*
2234          * Free up any already installed program.
2235          */
2236         pcap_freecode(&p->fcode);
2237
2238         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2239         p->fcode.bf_len = fp->bf_len;
2240         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2241         if (p->fcode.bf_insns == NULL) {
2242                 snprintf(p->errbuf, sizeof(p->errbuf),
2243                          "malloc: %s", pcap_strerror(errno));
2244                 return (-1);
2245         }
2246         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2247         return (0);
2248 }
2249
2250 #ifdef BDEBUG
2251 static void
2252 dot_dump_node(struct block *block, struct bpf_program *prog, FILE *out)
2253 {
2254         int icount, noffset;
2255         int i;
2256
2257         if (block == NULL || isMarked(block))
2258                 return;
2259         Mark(block);
2260
2261         icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2262         noffset = min(block->offset + icount, (int)prog->bf_len);
2263
2264         fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2265         for (i = block->offset; i < noffset; i++) {
2266                 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2267         }
2268         fprintf(out, "\" tooltip=\"");
2269         for (i = 0; i < BPF_MEMWORDS; i++)
2270                 if (block->val[i] != 0)
2271                         fprintf(out, "val[%d]=%d ", i, block->val[i]);
2272         fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2273         fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2274         fprintf(out, "\"");
2275         if (JT(block) == NULL)
2276                 fprintf(out, ", peripheries=2");
2277         fprintf(out, "];\n");
2278
2279         dot_dump_node(JT(block), prog, out);
2280         dot_dump_node(JF(block), prog, out);
2281 }
2282 static void
2283 dot_dump_edge(struct block *block, FILE *out)
2284 {
2285         if (block == NULL || isMarked(block))
2286                 return;
2287         Mark(block);
2288
2289         if (JT(block)) {
2290                 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2291                                 block->id, JT(block)->id);
2292                 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2293                            block->id, JF(block)->id);
2294         }
2295         dot_dump_edge(JT(block), out);
2296         dot_dump_edge(JF(block), out);
2297 }
2298 /* Output the block CFG using graphviz/DOT language
2299  * In the CFG, block's code, value index for each registers at EXIT,
2300  * and the jump relationship is show.
2301  *
2302  * example DOT for BPF `ip src host 1.1.1.1' is:
2303     digraph BPF {
2304         block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2  jf 5" tooltip="val[A]=0 val[X]=0"];
2305         block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4  jf 5" tooltip="val[A]=0 val[X]=0"];
2306         block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2307         block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2308         "block0":se -> "block1":n [label="T"];
2309         "block0":sw -> "block3":n [label="F"];
2310         "block1":se -> "block2":n [label="T"];
2311         "block1":sw -> "block3":n [label="F"];
2312     }
2313  *
2314  *  After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2315  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
2316  */
2317 static void
2318 dot_dump(struct block *root)
2319 {
2320         struct bpf_program f;
2321         FILE *out = stdout;
2322
2323         memset(bids, 0, sizeof bids);
2324         f.bf_insns = icode_to_fcode(root, &f.bf_len);
2325
2326         fprintf(out, "digraph BPF {\n");
2327         unMarkAll();
2328         dot_dump_node(root, &f, out);
2329         unMarkAll();
2330         dot_dump_edge(root, out);
2331         fprintf(out, "}\n");
2332
2333         free((char *)f.bf_insns);
2334 }
2335
2336 static void
2337 plain_dump(struct block *root)
2338 {
2339         struct bpf_program f;
2340
2341         memset(bids, 0, sizeof bids);
2342         f.bf_insns = icode_to_fcode(root, &f.bf_len);
2343         bpf_dump(&f, 1);
2344         putchar('\n');
2345         free((char *)f.bf_insns);
2346 }
2347 static void
2348 opt_dump(struct block *root)
2349 {
2350         /* if optimizer debugging is enabled, output DOT graph
2351          * `dflag=4' is equivalent to -dddd to follow -d/-dd/-ddd
2352      * convention in tcpdump command line
2353          */
2354         if (dflag > 3)
2355                 dot_dump(root);
2356         else
2357                 plain_dump(root);
2358 }
2359
2360 #endif