pcap/optimize.c

   1 /*
   2  * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
   7  * Reserved.  This file contains Original Code and/or Modifications of
   8  * Original Code as defined in and that are subject to the Apple Public
   9  * Source License Version 1.0 (the 'License').  You may not use this file
  10  * except in compliance with the License.  Please obtain a copy of the
  11  * License at http://www.apple.com/publicsource and read it before using
  12  * this file.
  13  *
  14  * The Original Code and all software distributed under the License are
  15  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  16  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  17  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  19  * License for the specific language governing rights and limitations
  20  * under the License."
  21  *
  22  * @APPLE_LICENSE_HEADER_END@
  23  */
  24 /*      $OpenBSD: optimize.c,v 1.5 1996/09/16 02:33:07 tholo Exp $      */
  25
  26 /*
  27  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
  28  *      The Regents of the University of California.  All rights reserved.
  29  *
  30  * Redistribution and use in source and binary forms, with or without
  31  * modification, are permitted provided that: (1) source code distributions
  32  * retain the above copyright notice and this paragraph in its entirety, (2)
  33  * distributions including binary code include the above copyright notice and
  34  * this paragraph in its entirety in the documentation or other materials
  35  * provided with the distribution, and (3) all advertising materials mentioning
  36  * features or use of this software display the following acknowledgement:
  37  * ``This product includes software developed by the University of California,
  38  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  39  * the University nor the names of its contributors may be used to endorse
  40  * or promote products derived from this software without specific prior
  41  * written permission.
  42  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  43  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  44  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  45  *
  46  *  Optimization module for tcpdump intermediate representation.
  47  */
  48 #ifndef lint
  49 static char rcsid[] =
  50     "@(#) Header: optimize.c,v 1.58 96/06/16 22:36:59 leres Exp (LBL)";
  51 #endif
  52
  53 #include <sys/types.h>
  54 #include <sys/time.h>
  55
  56 #include <net/bpf.h>
  57
  58 #include <stdio.h>
  59 #include <stdlib.h>
  60 #include <memory.h>
  61
  62 #ifdef HAVE_OS_PROTO_H
  63 #include "os-proto.h"
  64 #endif
  65
  66 #include "pcap-int.h"
  67 #include "gencode.h"
  68
  69 #ifdef BDEBUG
  70 extern int dflag;
  71 #endif
  72
  73 #define A_ATOM BPF_MEMWORDS
  74 #define X_ATOM (BPF_MEMWORDS+1)
  75
  76 #define NOP -1
  77
  78 /*
  79  * This define is used to represent *both* the accumulator and
  80  * x register in use-def computations.
  81  * Currently, the use-def code assumes only one definition per instruction.
  82  */
  83 #define AX_ATOM N_ATOMS
  84
  85 /*
  86  * A flag to indicate that further optimization is needed.
  87  * Iterative passes are continued until a given pass yields no
  88  * branch movement.
  89  */
  90 static int done;
  91
  92 /*
  93  * A block is marked if only if its mark equals the current mark.
  94  * Rather than traverse the code array, marking each item, 'cur_mark' is
  95  * incremented.  This automatically makes each element unmarked.
  96  */
  97 static int cur_mark;
  98 #define isMarked(p) ((p)->mark == cur_mark)
  99 #define unMarkAll() cur_mark += 1
 100 #define Mark(p) ((p)->mark = cur_mark)
 101
 102 static void opt_init(struct block *);
 103 static void opt_cleanup(void);
 104
 105 static void make_marks(struct block *);
 106 static void mark_code(struct block *);
 107
 108 static void intern_blocks(struct block *);
 109
 110 static int eq_slist(struct slist *, struct slist *);
 111
 112 static void find_levels_r(struct block *);
 113
 114 static void find_levels(struct block *);
 115 static void find_dom(struct block *);
 116 static void propedom(struct edge *);
 117 static void find_edom(struct block *);
 118 static void find_closure(struct block *);
 119 static int atomuse(struct stmt *);
 120 static int atomdef(struct stmt *);
 121 static void compute_local_ud(struct block *);
 122 static void find_ud(struct block *);
 123 static void init_val(void);
 124 static int F(int, int, int);
 125 static __inline void vstore(struct stmt *, int *, int, int);
 126 static void opt_blk(struct block *, int);
 127 static int use_conflict(struct block *, struct block *);
 128 static void opt_j(struct edge *);
 129 static void or_pullup(struct block *);
 130 static void and_pullup(struct block *);
 131 static void opt_blks(struct block *, int);
 132 static __inline void link_inedge(struct edge *, struct block *);
 133 static void find_inedges(struct block *);
 134 static void opt_root(struct block **);
 135 static void opt_loop(struct block *, int);
 136 static void fold_op(struct stmt *, int, int);
 137 static __inline struct slist *this_op(struct slist *);
 138 static void opt_not(struct block *);
 139 static void opt_peep(struct block *);
 140 static void opt_stmt(struct stmt *, int[], int);
 141 static void deadstmt(struct stmt *, struct stmt *[]);
 142 static void opt_deadstores(struct block *);
 143 static void opt_blk(struct block *, int);
 144 static int use_conflict(struct block *, struct block *);
 145 static void opt_j(struct edge *);
 146 static struct block *fold_edge(struct block *, struct edge *);
 147 static __inline int eq_blk(struct block *, struct block *);
 148 static int slength(struct slist *);
 149 static int count_blocks(struct block *);
 150 static void number_blks_r(struct block *);
 151 static int count_stmts(struct block *);
 152 static int convert_code_r(struct block *);
 153 #ifdef BDEBUG
 154 static void opt_dump(struct block *);
 155 #endif
 156
 157 static int n_blocks;
 158 struct block **blocks;
 159 static int n_edges;
 160 struct edge **edges;
 161
 162 /*
 163  * A bit vector set representation of the dominators.
 164  * We round up the set size to the next power of two.
 165  */
 166 static int nodewords;
 167 static int edgewords;
 168 struct block **levels;
 169 bpf_u_int32 *space;
 170 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 171 /*
 172  * True if a is in uset {p}
 173  */
 174 #define SET_MEMBER(p, a) \
 175 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 176
 177 /*
 178  * Add 'a' to uset p.
 179  */
 180 #define SET_INSERT(p, a) \
 181 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 182
 183 /*
 184  * Delete 'a' from uset p.
 185  */
 186 #define SET_DELETE(p, a) \
 187 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 188
 189 /*
 190  * a := a intersect b
 191  */
 192 #define SET_INTERSECT(a, b, n)\
 193 {\
 194         register bpf_u_int32 *_x = a, *_y = b;\
 195         register int _n = n;\
 196         while (--_n >= 0) *_x++ &= *_y++;\
 197 }
 198
 199 /*
 200  * a := a - b
 201  */
 202 #define SET_SUBTRACT(a, b, n)\
 203 {\
 204         register bpf_u_int32 *_x = a, *_y = b;\
 205         register int _n = n;\
 206         while (--_n >= 0) *_x++ &=~ *_y++;\
 207 }
 208
 209 /*
 210  * a := a union b
 211  */
 212 #define SET_UNION(a, b, n)\
 213 {\
 214         register bpf_u_int32 *_x = a, *_y = b;\
 215         register int _n = n;\
 216         while (--_n >= 0) *_x++ |= *_y++;\
 217 }
 218
 219 static uset all_dom_sets;
 220 static uset all_closure_sets;
 221 static uset all_edge_sets;
 222
 223 #ifndef MAX
 224 #define MAX(a,b) ((a)>(b)?(a):(b))
 225 #endif
 226
 227 static void
 228 find_levels_r(b)
 229         struct block *b;
 230 {
 231         int level;
 232
 233         if (isMarked(b))
 234                 return;
 235
 236         Mark(b);
 237         b->link = 0;
 238
 239         if (JT(b)) {
 240                 find_levels_r(JT(b));
 241                 find_levels_r(JF(b));
 242                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 243         } else
 244                 level = 0;
 245         b->level = level;
 246         b->link = levels[level];
 247         levels[level] = b;
 248 }
 249
 250 /*
 251  * Level graph.  The levels go from 0 at the leaves to
 252  * N_LEVELS at the root.  The levels[] array points to the
 253  * first node of the level list, whose elements are linked
 254  * with the 'link' field of the struct block.
 255  */
 256 static void
 257 find_levels(root)
 258         struct block *root;
 259 {
 260         memset((char *)levels, 0, n_blocks * sizeof(*levels));
 261         unMarkAll();
 262         find_levels_r(root);
 263 }
 264
 265 /*
 266  * Find dominator relationships.
 267  * Assumes graph has been leveled.
 268  */
 269 static void
 270 find_dom(root)
 271         struct block *root;
 272 {
 273         int i;
 274         struct block *b;
 275         bpf_u_int32 *x;
 276
 277         /*
 278          * Initialize sets to contain all nodes.
 279          */
 280         x = all_dom_sets;
 281         i = n_blocks * nodewords;
 282         while (--i >= 0)
 283                 *x++ = ~0;
 284         /* Root starts off empty. */
 285         for (i = nodewords; --i >= 0;)
 286                 root->dom[i] = 0;
 287
 288         /* root->level is the highest level no found. */
 289         for (i = root->level; i >= 0; --i) {
 290                 for (b = levels[i]; b; b = b->link) {
 291                         SET_INSERT(b->dom, b->id);
 292                         if (JT(b) == 0)
 293                                 continue;
 294                         SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
 295                         SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
 296                 }
 297         }
 298 }
 299
 300 static void
 301 propedom(ep)
 302         struct edge *ep;
 303 {
 304         SET_INSERT(ep->edom, ep->id);
 305         if (ep->succ) {
 306                 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
 307                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
 308         }
 309 }
 310
 311 /*
 312  * Compute edge dominators.
 313  * Assumes graph has been leveled and predecessors established.
 314  */
 315 static void
 316 find_edom(root)
 317         struct block *root;
 318 {
 319         int i;
 320         uset x;
 321         struct block *b;
 322
 323         x = all_edge_sets;
 324         for (i = n_edges * edgewords; --i >= 0; )
 325                 x[i] = ~0;
 326
 327         /* root->level is the highest level no found. */
 328         memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
 329         memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
 330         for (i = root->level; i >= 0; --i) {
 331                 for (b = levels[i]; b != 0; b = b->link) {
 332                         propedom(&b->et);
 333                         propedom(&b->ef);
 334                 }
 335         }
 336 }
 337
 338 /*
 339  * Find the backwards transitive closure of the flow graph.  These sets
 340  * are backwards in the sense that we find the set of nodes that reach
 341  * a given node, not the set of nodes that can be reached by a node.
 342  *
 343  * Assumes graph has been leveled.
 344  */
 345 static void
 346 find_closure(root)
 347         struct block *root;
 348 {
 349         int i;
 350         struct block *b;
 351
 352         /*
 353          * Initialize sets to contain no nodes.
 354          */
 355         memset((char *)all_closure_sets, 0,
 356               n_blocks * nodewords * sizeof(*all_closure_sets));
 357
 358         /* root->level is the highest level no found. */
 359         for (i = root->level; i >= 0; --i) {
 360                 for (b = levels[i]; b; b = b->link) {
 361                         SET_INSERT(b->closure, b->id);
 362                         if (JT(b) == 0)
 363                                 continue;
 364                         SET_UNION(JT(b)->closure, b->closure, nodewords);
 365                         SET_UNION(JF(b)->closure, b->closure, nodewords);
 366                 }
 367         }
 368 }
 369
 370 /*
 371  * Return the register number that is used by s.  If A and X are both
 372  * used, return AX_ATOM.  If no register is used, return -1.
 373  *
 374  * The implementation should probably change to an array access.
 375  */
 376 static int
 377 atomuse(s)
 378         struct stmt *s;
 379 {
 380         register int c = s->code;
 381
 382         if (c == NOP)
 383                 return -1;
 384
 385         switch (BPF_CLASS(c)) {
 386
 387         case BPF_RET:
 388                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 389                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 390
 391         case BPF_LD:
 392         case BPF_LDX:
 393                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 394                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 395
 396         case BPF_ST:
 397                 return A_ATOM;
 398
 399         case BPF_STX:
 400                 return X_ATOM;
 401
 402         case BPF_JMP:
 403         case BPF_ALU:
 404                 if (BPF_SRC(c) == BPF_X)
 405                         return AX_ATOM;
 406                 return A_ATOM;
 407
 408         case BPF_MISC:
 409                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 410         }
 411         abort();
 412         /* NOTREACHED */
 413 }
 414
 415 /*
 416  * Return the register number that is defined by 's'.  We assume that
 417  * a single stmt cannot define more than one register.  If no register
 418  * is defined, return -1.
 419  *
 420  * The implementation should probably change to an array access.
 421  */
 422 static int
 423 atomdef(s)
 424         struct stmt *s;
 425 {
 426         if (s->code == NOP)
 427                 return -1;
 428
 429         switch (BPF_CLASS(s->code)) {
 430
 431         case BPF_LD:
 432         case BPF_ALU:
 433                 return A_ATOM;
 434
 435         case BPF_LDX:
 436                 return X_ATOM;
 437
 438         case BPF_ST:
 439         case BPF_STX:
 440                 return s->k;
 441
 442         case BPF_MISC:
 443                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 444         }
 445         return -1;
 446 }
 447
 448 static void
 449 compute_local_ud(b)
 450         struct block *b;
 451 {
 452         struct slist *s;
 453         atomset def = 0, use = 0, kill = 0;
 454         int atom;
 455
 456         for (s = b->stmts; s; s = s->next) {
 457                 if (s->s.code == NOP)
 458                         continue;
 459                 atom = atomuse(&s->s);
 460                 if (atom >= 0) {
 461                         if (atom == AX_ATOM) {
 462                                 if (!ATOMELEM(def, X_ATOM))
 463                                         use |= ATOMMASK(X_ATOM);
 464                                 if (!ATOMELEM(def, A_ATOM))
 465                                         use |= ATOMMASK(A_ATOM);
 466                         }
 467                         else if (atom < N_ATOMS) {
 468                                 if (!ATOMELEM(def, atom))
 469                                         use |= ATOMMASK(atom);
 470                         }
 471                         else
 472                                 abort();
 473                 }
 474                 atom = atomdef(&s->s);
 475                 if (atom >= 0) {
 476                         if (!ATOMELEM(use, atom))
 477                                 kill |= ATOMMASK(atom);
 478                         def |= ATOMMASK(atom);
 479                 }
 480         }
 481         if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
 482                 use |= ATOMMASK(A_ATOM);
 483
 484         b->def = def;
 485         b->kill = kill;
 486         b->in_use = use;
 487 }
 488
 489 /*
 490  * Assume graph is already leveled.
 491  */
 492 static void
 493 find_ud(root)
 494         struct block *root;
 495 {
 496         int i, maxlevel;
 497         struct block *p;
 498
 499         /*
 500          * root->level is the highest level no found;
 501          * count down from there.
 502          */
 503         maxlevel = root->level;
 504         for (i = maxlevel; i >= 0; --i)
 505                 for (p = levels[i]; p; p = p->link) {
 506                         compute_local_ud(p);
 507                         p->out_use = 0;
 508                 }
 509
 510         for (i = 1; i <= maxlevel; ++i) {
 511                 for (p = levels[i]; p; p = p->link) {
 512                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 513                         p->in_use |= p->out_use &~ p->kill;
 514                 }
 515         }
 516 }
 517
 518 /*
 519  * These data structures are used in a Cocke and Shwarz style
 520  * value numbering scheme.  Since the flowgraph is acyclic,
 521  * exit values can be propagated from a node's predecessors
 522  * provided it is uniquely defined.
 523  */
 524 struct valnode {
 525         int code;
 526         int v0, v1;
 527         int val;
 528         struct valnode *next;
 529 };
 530
 531 #define MODULUS 213
 532 static struct valnode *hashtbl[MODULUS];
 533 static int curval;
 534 static int maxval;
 535
 536 /* Integer constants mapped with the load immediate opcode. */
 537 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
 538
 539 struct vmapinfo {
 540         int is_const;
 541         bpf_int32 const_val;
 542 };
 543
 544 struct vmapinfo *vmap;
 545 struct valnode *vnode_base;
 546 struct valnode *next_vnode;
 547
 548 static void
 549 init_val()
 550 {
 551         curval = 0;
 552         next_vnode = vnode_base;
 553         memset((char *)vmap, 0, maxval * sizeof(*vmap));
 554         memset((char *)hashtbl, 0, sizeof hashtbl);
 555 }
 556
 557 /* Because we really don't have an IR, this stuff is a little messy. */
 558 static int
 559 F(code, v0, v1)
 560         int code;
 561         int v0, v1;
 562 {
 563         u_int hash;
 564         int val;
 565         struct valnode *p;
 566
 567         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 568         hash %= MODULUS;
 569
 570         for (p = hashtbl[hash]; p; p = p->next)
 571                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 572                         return p->val;
 573
 574         val = ++curval;
 575         if (BPF_MODE(code) == BPF_IMM &&
 576             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 577                 vmap[val].const_val = v0;
 578                 vmap[val].is_const = 1;
 579         }
 580         p = next_vnode++;
 581         p->val = val;
 582         p->code = code;
 583         p->v0 = v0;
 584         p->v1 = v1;
 585         p->next = hashtbl[hash];
 586         hashtbl[hash] = p;
 587
 588         return val;
 589 }
 590
 591 static __inline void
 592 vstore(s, valp, newval, alter)
 593         struct stmt *s;
 594         int *valp;
 595         int newval;
 596         int alter;
 597 {
 598         if (alter && *valp == newval)
 599                 s->code = NOP;
 600         else
 601                 *valp = newval;
 602 }
 603
 604 static void
 605 fold_op(s, v0, v1)
 606         struct stmt *s;
 607         int v0, v1;
 608 {
 609         bpf_int32 a, b;
 610
 611         a = vmap[v0].const_val;
 612         b = vmap[v1].const_val;
 613
 614         switch (BPF_OP(s->code)) {
 615         case BPF_ADD:
 616                 a += b;
 617                 break;
 618
 619         case BPF_SUB:
 620                 a -= b;
 621                 break;
 622
 623         case BPF_MUL:
 624                 a *= b;
 625                 break;
 626
 627         case BPF_DIV:
 628                 if (b == 0)
 629                         bpf_error("division by zero");
 630                 a /= b;
 631                 break;
 632
 633         case BPF_AND:
 634                 a &= b;
 635                 break;
 636
 637         case BPF_OR:
 638                 a |= b;
 639                 break;
 640
 641         case BPF_LSH:
 642                 a <<= b;
 643                 break;
 644
 645         case BPF_RSH:
 646                 a >>= b;
 647                 break;
 648
 649         case BPF_NEG:
 650                 a = -a;
 651                 break;
 652
 653         default:
 654                 abort();
 655         }
 656         s->k = a;
 657         s->code = BPF_LD|BPF_IMM;
 658         done = 0;
 659 }
 660
 661 static __inline struct slist *
 662 this_op(s)
 663         struct slist *s;
 664 {
 665         while (s != 0 && s->s.code == NOP)
 666                 s = s->next;
 667         return s;
 668 }
 669
 670 static void
 671 opt_not(b)
 672         struct block *b;
 673 {
 674         struct block *tmp = JT(b);
 675
 676         JT(b) = JF(b);
 677         JF(b) = tmp;
 678 }
 679
 680 static void
 681 opt_peep(b)
 682         struct block *b;
 683 {
 684         struct slist *s;
 685         struct slist *next, *last;
 686         int val;
 687
 688         s = b->stmts;
 689         if (s == 0)
 690                 return;
 691
 692         last = s;
 693         while (1) {
 694                 s = this_op(s);
 695                 if (s == 0)
 696                         break;
 697                 next = this_op(s->next);
 698                 if (next == 0)
 699                         break;
 700                 last = next;
 701
 702                 /*
 703                  * st  M[k]     -->     st  M[k]
 704                  * ldx M[k]             tax
 705                  */
 706                 if (s->s.code == BPF_ST &&
 707                     next->s.code == (BPF_LDX|BPF_MEM) &&
 708                     s->s.k == next->s.k) {
 709                         done = 0;
 710                         next->s.code = BPF_MISC|BPF_TAX;
 711                 }
 712                 /*
 713                  * ld  #k       -->     ldx  #k
 714                  * tax                  txa
 715                  */
 716                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 717                     next->s.code == (BPF_MISC|BPF_TAX)) {
 718                         s->s.code = BPF_LDX|BPF_IMM;
 719                         next->s.code = BPF_MISC|BPF_TXA;
 720                         done = 0;
 721                 }
 722                 /*
 723                  * This is an ugly special case, but it happens
 724                  * when you say tcp[k] or udp[k] where k is a constant.
 725                  */
 726                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 727                         struct slist *add, *tax, *ild;
 728
 729                         /*
 730                          * Check that X isn't used on exit from this
 731                          * block (which the optimizer might cause).
 732                          * We know the code generator won't generate
 733                          * any local dependencies.
 734                          */
 735                         if (ATOMELEM(b->out_use, X_ATOM))
 736                                 break;
 737
 738                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 739                                 add = next;
 740                         else
 741                                 add = this_op(next->next);
 742                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 743                                 break;
 744
 745                         tax = this_op(add->next);
 746                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 747                                 break;
 748
 749                         ild = this_op(tax->next);
 750                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 751                             BPF_MODE(ild->s.code) != BPF_IND)
 752                                 break;
 753                         /*
 754                          * XXX We need to check that X is not
 755                          * subsequently used.  We know we can eliminate the
 756                          * accumulator modifications since it is defined
 757                          * by the last stmt of this sequence.
 758                          *
 759                          * We want to turn this sequence:
 760                          *
 761                          * (004) ldi     #0x2           {s}
 762                          * (005) ldxms   [14]           {next}  -- optional
 763                          * (006) addx                   {add}
 764                          * (007) tax                    {tax}
 765                          * (008) ild     [x+0]          {ild}
 766                          *
 767                          * into this sequence:
 768                          *
 769                          * (004) nop
 770                          * (005) ldxms   [14]
 771                          * (006) nop
 772                          * (007) nop
 773                          * (008) ild     [x+2]
 774                          *
 775                          */
 776                         ild->s.k += s->s.k;
 777                         s->s.code = NOP;
 778                         add->s.code = NOP;
 779                         tax->s.code = NOP;
 780                         done = 0;
 781                 }
 782                 s = next;
 783         }
 784         /*
 785          * If we have a subtract to do a comparison, and the X register
 786          * is a known constant, we can merge this value into the
 787          * comparison.
 788          */
 789         if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
 790             !ATOMELEM(b->out_use, A_ATOM)) {
 791                 val = b->val[X_ATOM];
 792                 if (vmap[val].is_const) {
 793                         int op;
 794
 795                         b->s.k += vmap[val].const_val;
 796                         op = BPF_OP(b->s.code);
 797                         if (op == BPF_JGT || op == BPF_JGE) {
 798                                 struct block *t = JT(b);
 799                                 JT(b) = JF(b);
 800                                 JF(b) = t;
 801                                 b->s.k += 0x80000000;
 802                         }
 803                         last->s.code = NOP;
 804                         done = 0;
 805                 } else if (b->s.k == 0) {
 806                         /*
 807                          * sub x  ->    nop
 808                          * j  #0        j  x
 809                          */
 810                         last->s.code = NOP;
 811                         b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
 812                                 BPF_X;
 813                         done = 0;
 814                 }
 815         }
 816         /*
 817          * Likewise, a constant subtract can be simplified.
 818          */
 819         else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
 820                  !ATOMELEM(b->out_use, A_ATOM)) {
 821                 int op;
 822
 823                 b->s.k += last->s.k;
 824                 last->s.code = NOP;
 825                 op = BPF_OP(b->s.code);
 826                 if (op == BPF_JGT || op == BPF_JGE) {
 827                         struct block *t = JT(b);
 828                         JT(b) = JF(b);
 829                         JF(b) = t;
 830                         b->s.k += 0x80000000;
 831                 }
 832                 done = 0;
 833         }
 834         /*
 835          * and #k       nop
 836          * jeq #0  ->   jset #k
 837          */
 838         if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 839             !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
 840                 b->s.k = last->s.k;
 841                 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 842                 last->s.code = NOP;
 843                 done = 0;
 844                 opt_not(b);
 845         }
 846         /*
 847          * If the accumulator is a known constant, we can compute the
 848          * comparison result.
 849          */
 850         val = b->val[A_ATOM];
 851         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 852                 bpf_int32 v = vmap[val].const_val;
 853                 switch (BPF_OP(b->s.code)) {
 854
 855                 case BPF_JEQ:
 856                         v = v == b->s.k;
 857                         break;
 858
 859                 case BPF_JGT:
 860                         v = (unsigned)v > b->s.k;
 861                         break;
 862
 863                 case BPF_JGE:
 864                         v = (unsigned)v >= b->s.k;
 865                         break;
 866
 867                 case BPF_JSET:
 868                         v &= b->s.k;
 869                         break;
 870
 871                 default:
 872                         abort();
 873                 }
 874                 if (JF(b) != JT(b))
 875                         done = 0;
 876                 if (v)
 877                         JF(b) = JT(b);
 878                 else
 879                         JT(b) = JF(b);
 880         }
 881 }
 882
 883 /*
 884  * Compute the symbolic value of expression of 's', and update
 885  * anything it defines in the value table 'val'.  If 'alter' is true,
 886  * do various optimizations.  This code would be cleaner if symbolic
 887  * evaluation and code transformations weren't folded together.
 888  */
 889 static void
 890 opt_stmt(s, val, alter)
 891         struct stmt *s;
 892         int val[];
 893         int alter;
 894 {
 895         int op;
 896         int v;
 897
 898         switch (s->code) {
 899
 900         case BPF_LD|BPF_ABS|BPF_W:
 901         case BPF_LD|BPF_ABS|BPF_H:
 902         case BPF_LD|BPF_ABS|BPF_B:
 903                 v = F(s->code, s->k, 0L);
 904                 vstore(s, &val[A_ATOM], v, alter);
 905                 break;
 906
 907         case BPF_LD|BPF_IND|BPF_W:
 908         case BPF_LD|BPF_IND|BPF_H:
 909         case BPF_LD|BPF_IND|BPF_B:
 910                 v = val[X_ATOM];
 911                 if (alter && vmap[v].is_const) {
 912                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
 913                         s->k += vmap[v].const_val;
 914                         v = F(s->code, s->k, 0L);
 915                         done = 0;
 916                 }
 917                 else
 918                         v = F(s->code, s->k, v);
 919                 vstore(s, &val[A_ATOM], v, alter);
 920                 break;
 921
 922         case BPF_LD|BPF_LEN:
 923                 v = F(s->code, 0L, 0L);
 924                 vstore(s, &val[A_ATOM], v, alter);
 925                 break;
 926
 927         case BPF_LD|BPF_IMM:
 928                 v = K(s->k);
 929                 vstore(s, &val[A_ATOM], v, alter);
 930                 break;
 931
 932         case BPF_LDX|BPF_IMM:
 933                 v = K(s->k);
 934                 vstore(s, &val[X_ATOM], v, alter);
 935                 break;
 936
 937         case BPF_LDX|BPF_MSH|BPF_B:
 938                 v = F(s->code, s->k, 0L);
 939                 vstore(s, &val[X_ATOM], v, alter);
 940                 break;
 941
 942         case BPF_ALU|BPF_NEG:
 943                 if (alter && vmap[val[A_ATOM]].is_const) {
 944                         s->code = BPF_LD|BPF_IMM;
 945                         s->k = -vmap[val[A_ATOM]].const_val;
 946                         val[A_ATOM] = K(s->k);
 947                 }
 948                 else
 949                         val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
 950                 break;
 951
 952         case BPF_ALU|BPF_ADD|BPF_K:
 953         case BPF_ALU|BPF_SUB|BPF_K:
 954         case BPF_ALU|BPF_MUL|BPF_K:
 955         case BPF_ALU|BPF_DIV|BPF_K:
 956         case BPF_ALU|BPF_AND|BPF_K:
 957         case BPF_ALU|BPF_OR|BPF_K:
 958         case BPF_ALU|BPF_LSH|BPF_K:
 959         case BPF_ALU|BPF_RSH|BPF_K:
 960                 op = BPF_OP(s->code);
 961                 if (alter) {
 962                         if (s->k == 0) {
 963                                 if (op == BPF_ADD || op == BPF_SUB ||
 964                                     op == BPF_LSH || op == BPF_RSH ||
 965                                     op == BPF_OR) {
 966                                         s->code = NOP;
 967                                         break;
 968                                 }
 969                                 if (op == BPF_MUL || op == BPF_AND) {
 970                                         s->code = BPF_LD|BPF_IMM;
 971                                         val[A_ATOM] = K(s->k);
 972                                         break;
 973                                 }
 974                         }
 975                         if (vmap[val[A_ATOM]].is_const) {
 976                                 fold_op(s, val[A_ATOM], K(s->k));
 977                                 val[A_ATOM] = K(s->k);
 978                                 break;
 979                         }
 980                 }
 981                 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
 982                 break;
 983
 984         case BPF_ALU|BPF_ADD|BPF_X:
 985         case BPF_ALU|BPF_SUB|BPF_X:
 986         case BPF_ALU|BPF_MUL|BPF_X:
 987         case BPF_ALU|BPF_DIV|BPF_X:
 988         case BPF_ALU|BPF_AND|BPF_X:
 989         case BPF_ALU|BPF_OR|BPF_X:
 990         case BPF_ALU|BPF_LSH|BPF_X:
 991         case BPF_ALU|BPF_RSH|BPF_X:
 992                 op = BPF_OP(s->code);
 993                 if (alter && vmap[val[X_ATOM]].is_const) {
 994                         if (vmap[val[A_ATOM]].is_const) {
 995                                 fold_op(s, val[A_ATOM], val[X_ATOM]);
 996                                 val[A_ATOM] = K(s->k);
 997                         }
 998                         else {
 999                                 s->code = BPF_ALU|BPF_K|op;
1000                                 s->k = vmap[val[X_ATOM]].const_val;
1001                                 done = 0;
1002                                 val[A_ATOM] =
1003                                         F(s->code, val[A_ATOM], K(s->k));
1004                         }
1005                         break;
1006                 }
1007                 /*
1008                  * Check if we're doing something to an accumulator
1009                  * that is 0, and simplify.  This may not seem like
1010                  * much of a simplification but it could open up further
1011                  * optimizations.
1012                  * XXX We could also check for mul by 1, and -1, etc.
1013                  */
1014                 if (alter && vmap[val[A_ATOM]].is_const
1015                     && vmap[val[A_ATOM]].const_val == 0) {
1016                         if (op == BPF_ADD || op == BPF_OR ||
1017                             op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
1018                                 s->code = BPF_MISC|BPF_TXA;
1019                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1020                                 break;
1021                         }
1022                         else if (op == BPF_MUL || op == BPF_DIV ||
1023                                  op == BPF_AND) {
1024                                 s->code = BPF_LD|BPF_IMM;
1025                                 s->k = 0;
1026                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1027                                 break;
1028                         }
1029                         else if (op == BPF_NEG) {
1030                                 s->code = NOP;
1031                                 break;
1032                         }
1033                 }
1034                 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1035                 break;
1036
1037         case BPF_MISC|BPF_TXA:
1038                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1039                 break;
1040
1041         case BPF_LD|BPF_MEM:
1042                 v = val[s->k];
1043                 if (alter && vmap[v].is_const) {
1044                         s->code = BPF_LD|BPF_IMM;
1045                         s->k = vmap[v].const_val;
1046                         done = 0;
1047                 }
1048                 vstore(s, &val[A_ATOM], v, alter);
1049                 break;
1050
1051         case BPF_MISC|BPF_TAX:
1052                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1053                 break;
1054
1055         case BPF_LDX|BPF_MEM:
1056                 v = val[s->k];
1057                 if (alter && vmap[v].is_const) {
1058                         s->code = BPF_LDX|BPF_IMM;
1059                         s->k = vmap[v].const_val;
1060                         done = 0;
1061                 }
1062                 vstore(s, &val[X_ATOM], v, alter);
1063                 break;
1064
1065         case BPF_ST:
1066                 vstore(s, &val[s->k], val[A_ATOM], alter);
1067                 break;
1068
1069         case BPF_STX:
1070                 vstore(s, &val[s->k], val[X_ATOM], alter);
1071                 break;
1072         }
1073 }
1074
1075 static void
1076 deadstmt(s, last)
1077         register struct stmt *s;
1078         register struct stmt *last[];
1079 {
1080         register int atom;
1081
1082         atom = atomuse(s);
1083         if (atom >= 0) {
1084                 if (atom == AX_ATOM) {
1085                         last[X_ATOM] = 0;
1086                         last[A_ATOM] = 0;
1087                 }
1088                 else
1089                         last[atom] = 0;
1090         }
1091         atom = atomdef(s);
1092         if (atom >= 0) {
1093                 if (last[atom]) {
1094                         done = 0;
1095                         last[atom]->code = NOP;
1096                 }
1097                 last[atom] = s;
1098         }
1099 }
1100
1101 static void
1102 opt_deadstores(b)
1103         register struct block *b;
1104 {
1105         register struct slist *s;
1106         register int atom;
1107         struct stmt *last[N_ATOMS];
1108
1109         memset((char *)last, 0, sizeof last);
1110
1111         for (s = b->stmts; s != 0; s = s->next)
1112                 deadstmt(&s->s, last);
1113         deadstmt(&b->s, last);
1114
1115         for (atom = 0; atom < N_ATOMS; ++atom)
1116                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1117                         last[atom]->code = NOP;
1118                         done = 0;
1119                 }
1120 }
1121
1122 static void
1123 opt_blk(b, do_stmts)
1124         struct block *b;
1125         int do_stmts;
1126 {
1127         struct slist *s;
1128         struct edge *p;
1129         int i;
1130         bpf_int32 aval;
1131
1132         /*
1133          * Initialize the atom values.
1134          * If we have no predecessors, everything is undefined.
1135          * Otherwise, we inherent our values from our predecessors.
1136          * If any register has an ambiguous value (i.e. control paths are
1137          * merging) give it the undefined value of 0.
1138          */
1139         p = b->in_edges;
1140         if (p == 0)
1141                 memset((char *)b->val, 0, sizeof(b->val));
1142         else {
1143                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1144                 while ((p = p->next) != NULL) {
1145                         for (i = 0; i < N_ATOMS; ++i)
1146                                 if (b->val[i] != p->pred->val[i])
1147                                         b->val[i] = 0;
1148                 }
1149         }
1150         aval = b->val[A_ATOM];
1151         for (s = b->stmts; s; s = s->next)
1152                 opt_stmt(&s->s, b->val, do_stmts);
1153
1154         /*
1155          * This is a special case: if we don't use anything from this
1156          * block, and we load the accumulator with value that is
1157          * already there, or if this block is a return,
1158          * eliminate all the statements.
1159          */
1160         if (do_stmts &&
1161             ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
1162              BPF_CLASS(b->s.code) == BPF_RET)) {
1163                 if (b->stmts != 0) {
1164                         b->stmts = 0;
1165                         done = 0;
1166                 }
1167         } else {
1168                 opt_peep(b);
1169                 opt_deadstores(b);
1170         }
1171         /*
1172          * Set up values for branch optimizer.
1173          */
1174         if (BPF_SRC(b->s.code) == BPF_K)
1175                 b->oval = K(b->s.k);
1176         else
1177                 b->oval = b->val[X_ATOM];
1178         b->et.code = b->s.code;
1179         b->ef.code = -b->s.code;
1180 }
1181
1182 /*
1183  * Return true if any register that is used on exit from 'succ', has
1184  * an exit value that is different from the corresponding exit value
1185  * from 'b'.
1186  */
1187 static int
1188 use_conflict(b, succ)
1189         struct block *b, *succ;
1190 {
1191         int atom;
1192         atomset use = succ->out_use;
1193
1194         if (use == 0)
1195                 return 0;
1196
1197         for (atom = 0; atom < N_ATOMS; ++atom)
1198                 if (ATOMELEM(use, atom))
1199                         if (b->val[atom] != succ->val[atom])
1200                                 return 1;
1201         return 0;
1202 }
1203
1204 static struct block *
1205 fold_edge(child, ep)
1206         struct block *child;
1207         struct edge *ep;
1208 {
1209         int sense;
1210         int aval0, aval1, oval0, oval1;
1211         int code = ep->code;
1212
1213         if (code < 0) {
1214                 code = -code;
1215                 sense = 0;
1216         } else
1217                 sense = 1;
1218
1219         if (child->s.code != code)
1220                 return 0;
1221
1222         aval0 = child->val[A_ATOM];
1223         oval0 = child->oval;
1224         aval1 = ep->pred->val[A_ATOM];
1225         oval1 = ep->pred->oval;
1226
1227         if (aval0 != aval1)
1228                 return 0;
1229
1230         if (oval0 == oval1)
1231                 /*
1232                  * The operands are identical, so the
1233                  * result is true if a true branch was
1234                  * taken to get here, otherwise false.
1235                  */
1236                 return sense ? JT(child) : JF(child);
1237
1238         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1239                 /*
1240                  * At this point, we only know the comparison if we
1241                  * came down the true branch, and it was an equality
1242                  * comparison with a constant.  We rely on the fact that
1243                  * distinct constants have distinct value numbers.
1244                  */
1245                 return JF(child);
1246
1247         return 0;
1248 }
1249
1250 static void
1251 opt_j(ep)
1252         struct edge *ep;
1253 {
1254         register int i, k;
1255         register struct block *target;
1256
1257         if (JT(ep->succ) == 0)
1258                 return;
1259
1260         if (JT(ep->succ) == JF(ep->succ)) {
1261                 /*
1262                  * Common branch targets can be eliminated, provided
1263                  * there is no data dependency.
1264                  */
1265                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1266                         done = 0;
1267                         ep->succ = JT(ep->succ);
1268                 }
1269         }
1270         /*
1271          * For each edge dominator that matches the successor of this
1272          * edge, promote the edge successor to the its grandchild.
1273          *
1274          * XXX We violate the set abstraction here in favor a reasonably
1275          * efficient loop.
1276          */
1277  top:
1278         for (i = 0; i < edgewords; ++i) {
1279                 register bpf_u_int32 x = ep->edom[i];
1280
1281                 while (x != 0) {
1282                         k = ffs(x) - 1;
1283                         x &=~ (1 << k);
1284                         k += i * BITS_PER_WORD;
1285
1286                         target = fold_edge(ep->succ, edges[k]);
1287                         /*
1288                          * Check that there is no data dependency between
1289                          * nodes that will be violated if we move the edge.
1290                          */
1291                         if (target != 0 && !use_conflict(ep->pred, target)) {
1292                                 done = 0;
1293                                 ep->succ = target;
1294                                 if (JT(target) != 0)
1295                                         /*
1296                                          * Start over unless we hit a leaf.
1297                                          */
1298                                         goto top;
1299                                 return;
1300                         }
1301                 }
1302         }
1303 }
1304
1305
1306 static void
1307 or_pullup(b)
1308         struct block *b;
1309 {
1310         int val, at_top;
1311         struct block *pull;
1312         struct block **diffp, **samep;
1313         struct edge *ep;
1314
1315         ep = b->in_edges;
1316         if (ep == 0)
1317                 return;
1318
1319         /*
1320          * Make sure each predecessor loads the same value.
1321          * XXX why?
1322          */
1323         val = ep->pred->val[A_ATOM];
1324         for (ep = ep->next; ep != 0; ep = ep->next)
1325                 if (val != ep->pred->val[A_ATOM])
1326                         return;
1327
1328         if (JT(b->in_edges->pred) == b)
1329                 diffp = &JT(b->in_edges->pred);
1330         else
1331                 diffp = &JF(b->in_edges->pred);
1332
1333         at_top = 1;
1334         while (1) {
1335                 if (*diffp == 0)
1336                         return;
1337
1338                 if (JT(*diffp) != JT(b))
1339                         return;
1340
1341                 if (!SET_MEMBER((*diffp)->dom, b->id))
1342                         return;
1343
1344                 if ((*diffp)->val[A_ATOM] != val)
1345                         break;
1346
1347                 diffp = &JF(*diffp);
1348                 at_top = 0;
1349         }
1350         samep = &JF(*diffp);
1351         while (1) {
1352                 if (*samep == 0)
1353                         return;
1354
1355                 if (JT(*samep) != JT(b))
1356                         return;
1357
1358                 if (!SET_MEMBER((*samep)->dom, b->id))
1359                         return;
1360
1361                 if ((*samep)->val[A_ATOM] == val)
1362                         break;
1363
1364                 /* XXX Need to check that there are no data dependencies
1365                    between dp0 and dp1.  Currently, the code generator
1366                    will not produce such dependencies. */
1367                 samep = &JF(*samep);
1368         }
1369 #ifdef notdef
1370         /* XXX This doesn't cover everything. */
1371         for (i = 0; i < N_ATOMS; ++i)
1372                 if ((*samep)->val[i] != pred->val[i])
1373                         return;
1374 #endif
1375         /* Pull up the node. */
1376         pull = *samep;
1377         *samep = JF(pull);
1378         JF(pull) = *diffp;
1379
1380         /*
1381          * At the top of the chain, each predecessor needs to point at the
1382          * pulled up node.  Inside the chain, there is only one predecessor
1383          * to worry about.
1384          */
1385         if (at_top) {
1386                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1387                         if (JT(ep->pred) == b)
1388                                 JT(ep->pred) = pull;
1389                         else
1390                                 JF(ep->pred) = pull;
1391                 }
1392         }
1393         else
1394                 *diffp = pull;
1395
1396         done = 0;
1397 }
1398
1399 static void
1400 and_pullup(b)
1401         struct block *b;
1402 {
1403         int val, at_top;
1404         struct block *pull;
1405         struct block **diffp, **samep;
1406         struct edge *ep;
1407
1408         ep = b->in_edges;
1409         if (ep == 0)
1410                 return;
1411
1412         /*
1413          * Make sure each predecessor loads the same value.
1414          */
1415         val = ep->pred->val[A_ATOM];
1416         for (ep = ep->next; ep != 0; ep = ep->next)
1417                 if (val != ep->pred->val[A_ATOM])
1418                         return;
1419
1420         if (JT(b->in_edges->pred) == b)
1421                 diffp = &JT(b->in_edges->pred);
1422         else
1423                 diffp = &JF(b->in_edges->pred);
1424
1425         at_top = 1;
1426         while (1) {
1427                 if (*diffp == 0)
1428                         return;
1429
1430                 if (JF(*diffp) != JF(b))
1431                         return;
1432
1433                 if (!SET_MEMBER((*diffp)->dom, b->id))
1434                         return;
1435
1436                 if ((*diffp)->val[A_ATOM] != val)
1437                         break;
1438
1439                 diffp = &JT(*diffp);
1440                 at_top = 0;
1441         }
1442         samep = &JT(*diffp);
1443         while (1) {
1444                 if (*samep == 0)
1445                         return;
1446
1447                 if (JF(*samep) != JF(b))
1448                         return;
1449
1450                 if (!SET_MEMBER((*samep)->dom, b->id))
1451                         return;
1452
1453                 if ((*samep)->val[A_ATOM] == val)
1454                         break;
1455
1456                 /* XXX Need to check that there are no data dependencies
1457                    between diffp and samep.  Currently, the code generator
1458                    will not produce such dependencies. */
1459                 samep = &JT(*samep);
1460         }
1461 #ifdef notdef
1462         /* XXX This doesn't cover everything. */
1463         for (i = 0; i < N_ATOMS; ++i)
1464                 if ((*samep)->val[i] != pred->val[i])
1465                         return;
1466 #endif
1467         /* Pull up the node. */
1468         pull = *samep;
1469         *samep = JT(pull);
1470         JT(pull) = *diffp;
1471
1472         /*
1473          * At the top of the chain, each predecessor needs to point at the
1474          * pulled up node.  Inside the chain, there is only one predecessor
1475          * to worry about.
1476          */
1477         if (at_top) {
1478                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1479                         if (JT(ep->pred) == b)
1480                                 JT(ep->pred) = pull;
1481                         else
1482                                 JF(ep->pred) = pull;
1483                 }
1484         }
1485         else
1486                 *diffp = pull;
1487
1488         done = 0;
1489 }
1490
1491 static void
1492 opt_blks(root, do_stmts)
1493         struct block *root;
1494         int do_stmts;
1495 {
1496         int i, maxlevel;
1497         struct block *p;
1498
1499         init_val();
1500         maxlevel = root->level;
1501         for (i = maxlevel; i >= 0; --i)
1502                 for (p = levels[i]; p; p = p->link)
1503                         opt_blk(p, do_stmts);
1504
1505         if (do_stmts)
1506                 /*
1507                  * No point trying to move branches; it can't possibly
1508                  * make a difference at this point.
1509                  */
1510                 return;
1511
1512         for (i = 1; i <= maxlevel; ++i) {
1513                 for (p = levels[i]; p; p = p->link) {
1514                         opt_j(&p->et);
1515                         opt_j(&p->ef);
1516                 }
1517         }
1518         for (i = 1; i <= maxlevel; ++i) {
1519                 for (p = levels[i]; p; p = p->link) {
1520                         or_pullup(p);
1521                         and_pullup(p);
1522                 }
1523         }
1524 }
1525
1526 static __inline void
1527 link_inedge(parent, child)
1528         struct edge *parent;
1529         struct block *child;
1530 {
1531         parent->next = child->in_edges;
1532         child->in_edges = parent;
1533 }
1534
1535 static void
1536 find_inedges(root)
1537         struct block *root;
1538 {
1539         int i;
1540         struct block *b;
1541
1542         for (i = 0; i < n_blocks; ++i)
1543                 blocks[i]->in_edges = 0;
1544
1545         /*
1546          * Traverse the graph, adding each edge to the predecessor
1547          * list of its successors.  Skip the leaves (i.e. level 0).
1548          */
1549         for (i = root->level; i > 0; --i) {
1550                 for (b = levels[i]; b != 0; b = b->link) {
1551                         link_inedge(&b->et, JT(b));
1552                         link_inedge(&b->ef, JF(b));
1553                 }
1554         }
1555 }
1556
1557 static void
1558 opt_root(b)
1559         struct block **b;
1560 {
1561         struct slist *tmp, *s;
1562
1563         s = (*b)->stmts;
1564         (*b)->stmts = 0;
1565         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1566                 *b = JT(*b);
1567
1568         tmp = (*b)->stmts;
1569         if (tmp != 0)
1570                 sappend(s, tmp);
1571         (*b)->stmts = s;
1572
1573         /*
1574          * If the root node is a return, then there is no
1575          * point executing any statements (since the bpf machine
1576          * has no side effects).
1577          */
1578         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1579                 (*b)->stmts = 0;
1580 }
1581
1582 static void
1583 opt_loop(root, do_stmts)
1584         struct block *root;
1585         int do_stmts;
1586 {
1587
1588 #ifdef BDEBUG
1589         if (dflag > 1)
1590                 opt_dump(root);
1591 #endif
1592         do {
1593                 done = 1;
1594                 find_levels(root);
1595                 find_dom(root);
1596                 find_closure(root);
1597                 find_inedges(root);
1598                 find_ud(root);
1599                 find_edom(root);
1600                 opt_blks(root, do_stmts);
1601 #ifdef BDEBUG
1602                 if (dflag > 1)
1603                         opt_dump(root);
1604 #endif
1605         } while (!done);
1606 }
1607
1608 /*
1609  * Optimize the filter code in its dag representation.
1610  */
1611 void
1612 bpf_optimize(rootp)
1613         struct block **rootp;
1614 {
1615         struct block *root;
1616
1617         root = *rootp;
1618
1619         opt_init(root);
1620         opt_loop(root, 0);
1621         opt_loop(root, 1);
1622         intern_blocks(root);
1623         opt_root(rootp);
1624         opt_cleanup();
1625 }
1626
1627 static void
1628 make_marks(p)
1629         struct block *p;
1630 {
1631         if (!isMarked(p)) {
1632                 Mark(p);
1633                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1634                         make_marks(JT(p));
1635                         make_marks(JF(p));
1636                 }
1637         }
1638 }
1639
1640 /*
1641  * Mark code array such that isMarked(i) is true
1642  * only for nodes that are alive.
1643  */
1644 static void
1645 mark_code(p)
1646         struct block *p;
1647 {
1648         cur_mark += 1;
1649         make_marks(p);
1650 }
1651
1652 /*
1653  * True iff the two stmt lists load the same value from the packet into
1654  * the accumulator.
1655  */
1656 static int
1657 eq_slist(x, y)
1658         struct slist *x, *y;
1659 {
1660         while (1) {
1661                 while (x && x->s.code == NOP)
1662                         x = x->next;
1663                 while (y && y->s.code == NOP)
1664                         y = y->next;
1665                 if (x == 0)
1666                         return y == 0;
1667                 if (y == 0)
1668                         return x == 0;
1669                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1670                         return 0;
1671                 x = x->next;
1672                 y = y->next;
1673         }
1674 }
1675
1676 static __inline int
1677 eq_blk(b0, b1)
1678         struct block *b0, *b1;
1679 {
1680         if (b0->s.code == b1->s.code &&
1681             b0->s.k == b1->s.k &&
1682             b0->et.succ == b1->et.succ &&
1683             b0->ef.succ == b1->ef.succ)
1684                 return eq_slist(b0->stmts, b1->stmts);
1685         return 0;
1686 }
1687
1688 static void
1689 intern_blocks(root)
1690         struct block *root;
1691 {
1692         struct block *p;
1693         int i, j;
1694         int done;
1695  top:
1696         done = 1;
1697         for (i = 0; i < n_blocks; ++i)
1698                 blocks[i]->link = 0;
1699
1700         mark_code(root);
1701
1702         for (i = n_blocks - 1; --i >= 0; ) {
1703                 if (!isMarked(blocks[i]))
1704                         continue;
1705                 for (j = i + 1; j < n_blocks; ++j) {
1706                         if (!isMarked(blocks[j]))
1707                                 continue;
1708                         if (eq_blk(blocks[i], blocks[j])) {
1709                                 blocks[i]->link = blocks[j]->link ?
1710                                         blocks[j]->link : blocks[j];
1711                                 break;
1712                         }
1713                 }
1714         }
1715         for (i = 0; i < n_blocks; ++i) {
1716                 p = blocks[i];
1717                 if (JT(p) == 0)
1718                         continue;
1719                 if (JT(p)->link) {
1720                         done = 0;
1721                         JT(p) = JT(p)->link;
1722                 }
1723                 if (JF(p)->link) {
1724                         done = 0;
1725                         JF(p) = JF(p)->link;
1726                 }
1727         }
1728         if (!done)
1729                 goto top;
1730 }
1731
1732 static void
1733 opt_cleanup()
1734 {
1735         free((void *)vnode_base);
1736         free((void *)vmap);
1737         free((void *)edges);
1738         free((void *)space);
1739         free((void *)levels);
1740         free((void *)blocks);
1741 }
1742
1743 /*
1744  * Return the number of stmts in 's'.
1745  */
1746 static int
1747 slength(s)
1748         struct slist *s;
1749 {
1750         int n = 0;
1751
1752         for (; s; s = s->next)
1753                 if (s->s.code != NOP)
1754                         ++n;
1755         return n;
1756 }
1757
1758 /*
1759  * Return the number of nodes reachable by 'p'.
1760  * All nodes should be initially unmarked.
1761  */
1762 static int
1763 count_blocks(p)
1764         struct block *p;
1765 {
1766         if (p == 0 || isMarked(p))
1767                 return 0;
1768         Mark(p);
1769         return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1770 }
1771
1772 /*
1773  * Do a depth first search on the flow graph, numbering the
1774  * the basic blocks, and entering them into the 'blocks' array.`
1775  */
1776 static void
1777 number_blks_r(p)
1778         struct block *p;
1779 {
1780         int n;
1781
1782         if (p == 0 || isMarked(p))
1783                 return;
1784
1785         Mark(p);
1786         n = n_blocks++;
1787         p->id = n;
1788         blocks[n] = p;
1789
1790         number_blks_r(JT(p));
1791         number_blks_r(JF(p));
1792 }
1793
1794 /*
1795  * Return the number of stmts in the flowgraph reachable by 'p'.
1796  * The nodes should be unmarked before calling.
1797  */
1798 static int
1799 count_stmts(p)
1800         struct block *p;
1801 {
1802         int n;
1803
1804         if (p == 0 || isMarked(p))
1805                 return 0;
1806         Mark(p);
1807         n = count_stmts(JT(p)) + count_stmts(JF(p));
1808         return slength(p->stmts) + n + 1;
1809 }
1810
1811 /*
1812  * Allocate memory.  All allocation is done before optimization
1813  * is begun.  A linear bound on the size of all data structures is computed
1814  * from the total number of blocks and/or statements.
1815  */
1816 static void
1817 opt_init(root)
1818         struct block *root;
1819 {
1820         bpf_u_int32 *p;
1821         int i, n, max_stmts;
1822
1823         /*
1824          * First, count the blocks, so we can malloc an array to map
1825          * block number to block.  Then, put the blocks into the array.
1826          */
1827         unMarkAll();
1828         n = count_blocks(root);
1829         blocks = (struct block **)malloc(n * sizeof(*blocks));
1830         unMarkAll();
1831         n_blocks = 0;
1832         number_blks_r(root);
1833
1834         n_edges = 2 * n_blocks;
1835         edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1836
1837         /*
1838          * The number of levels is bounded by the number of nodes.
1839          */
1840         levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1841
1842         edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1843         nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1844
1845         /* XXX */
1846         space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1847                                  + n_edges * edgewords * sizeof(*space));
1848         p = space;
1849         all_dom_sets = p;
1850         for (i = 0; i < n; ++i) {
1851                 blocks[i]->dom = p;
1852                 p += nodewords;
1853         }
1854         all_closure_sets = p;
1855         for (i = 0; i < n; ++i) {
1856                 blocks[i]->closure = p;
1857                 p += nodewords;
1858         }
1859         all_edge_sets = p;
1860         for (i = 0; i < n; ++i) {
1861                 register struct block *b = blocks[i];
1862
1863                 b->et.edom = p;
1864                 p += edgewords;
1865                 b->ef.edom = p;
1866                 p += edgewords;
1867                 b->et.id = i;
1868                 edges[i] = &b->et;
1869                 b->ef.id = n_blocks + i;
1870                 edges[n_blocks + i] = &b->ef;
1871                 b->et.pred = b;
1872                 b->ef.pred = b;
1873         }
1874         max_stmts = 0;
1875         for (i = 0; i < n; ++i)
1876                 max_stmts += slength(blocks[i]->stmts) + 1;
1877         /*
1878          * We allocate at most 3 value numbers per statement,
1879          * so this is an upper bound on the number of valnodes
1880          * we'll need.
1881          */
1882         maxval = 3 * max_stmts;
1883         vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1884         vnode_base = (struct valnode *)malloc(maxval * sizeof(*vmap));
1885 }
1886
1887 /*
1888  * Some pointers used to convert the basic block form of the code,
1889  * into the array form that BPF requires.  'fstart' will point to
1890  * the malloc'd array while 'ftail' is used during the recursive traversal.
1891  */
1892 static struct bpf_insn *fstart;
1893 static struct bpf_insn *ftail;
1894
1895 #ifdef BDEBUG
1896 int bids[1000];
1897 #endif
1898
1899 /*
1900  * Returns true if successful.  Returns false if a branch has
1901  * an offset that is too large.  If so, we have marked that
1902  * branch so that on a subsequent iteration, it will be treated
1903  * properly.
1904  */
1905 static int
1906 convert_code_r(p)
1907         struct block *p;
1908 {
1909         struct bpf_insn *dst;
1910         struct slist *src;
1911         int slen;
1912         u_int off;
1913         int extrajmps;          /* number of extra jumps inserted */
1914
1915         if (p == 0 || isMarked(p))
1916                 return (1);
1917         Mark(p);
1918
1919         if (convert_code_r(JF(p)) == 0)
1920                 return (0);
1921         if (convert_code_r(JT(p)) == 0)
1922                 return (0);
1923
1924         slen = slength(p->stmts);
1925         dst = ftail -= (slen + 1 + p->longjt + p->longjf);
1926                 /* inflate length by any extra jumps */
1927
1928         p->offset = dst - fstart;
1929
1930         for (src = p->stmts; src; src = src->next) {
1931                 if (src->s.code == NOP)
1932                         continue;
1933                 dst->code = (u_short)src->s.code;
1934                 dst->k = src->s.k;
1935                 ++dst;
1936         }
1937 #ifdef BDEBUG
1938         bids[dst - fstart] = p->id + 1;
1939 #endif
1940         dst->code = (u_short)p->s.code;
1941         dst->k = p->s.k;
1942         if (JT(p)) {
1943                 extrajmps = 0;
1944                 off = JT(p)->offset - (p->offset + slen) - 1;
1945                 if (off >= 256) {
1946                     /* offset too large for branch, must add a jump */
1947                     if (p->longjt == 0) {
1948                         /* mark this instruction and retry */
1949                         p->longjt++;
1950                         return(0);
1951                     }
1952                     /* branch if T to following jump */
1953                     dst->jt = extrajmps;
1954                     extrajmps++;
1955                     dst[extrajmps].code = BPF_JMP|BPF_JA;
1956                     dst[extrajmps].k = off - extrajmps;
1957                 }
1958                 else
1959                     dst->jt = off;
1960                 off = JF(p)->offset - (p->offset + slen) - 1;
1961                 if (off >= 256) {
1962                     /* offset too large for branch, must add a jump */
1963                     if (p->longjf == 0) {
1964                         /* mark this instruction and retry */
1965                         p->longjf++;
1966                         return(0);
1967                     }
1968                     /* branch if F to following jump */
1969                     /* if two jumps are inserted, F goes to second one */
1970                     dst->jf = extrajmps;
1971                     extrajmps++;
1972                     dst[extrajmps].code = BPF_JMP|BPF_JA;
1973                     dst[extrajmps].k = off - extrajmps;
1974                 }
1975                 else
1976                     dst->jf = off;
1977         }
1978         return (1);
1979 }
1980
1981
1982 /*
1983  * Convert flowgraph intermediate representation to the
1984  * BPF array representation.  Set *lenp to the number of instructions.
1985  */
1986 struct bpf_insn *
1987 icode_to_fcode(root, lenp)
1988         struct block *root;
1989         int *lenp;
1990 {
1991         int n;
1992         struct bpf_insn *fp;
1993
1994         /*
1995          * Loop doing convert_codr_r() until no branches remain
1996          * with too-large offsets.
1997          */
1998         while (1) {
1999             unMarkAll();
2000             n = *lenp = count_stmts(root);
2001
2002             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2003             memset((char *)fp, 0, sizeof(*fp) * n);
2004             fstart = fp;
2005             ftail = fp + n;
2006
2007             unMarkAll();
2008             if (convert_code_r(root))
2009                 break;
2010             free(fp);
2011         }
2012
2013         return fp;
2014 }
2015
2016 #ifdef BDEBUG
2017 static void
2018 opt_dump(root)
2019         struct block *root;
2020 {
2021         struct bpf_program f;
2022
2023         memset(bids, 0, sizeof bids);
2024         f.bf_insns = icode_to_fcode(root, &f.bf_len);
2025         bpf_dump(&f, 1);
2026         putchar('\n');
2027         free((char *)f.bf_insns);
2028 }
2029 #endif