]> git.saurik.com Git - apple/network_cmds.git/blob - pcap/optimize.c
e78ce1c0c191125549443cadc3276c7c6414596b
[apple/network_cmds.git] / pcap / optimize.c
1 /*
2 * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * "Portions Copyright (c) 1999 Apple Computer, Inc. All Rights
7 * Reserved. This file contains Original Code and/or Modifications of
8 * Original Code as defined in and that are subject to the Apple Public
9 * Source License Version 1.0 (the 'License'). You may not use this file
10 * except in compliance with the License. Please obtain a copy of the
11 * License at http://www.apple.com/publicsource and read it before using
12 * this file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
19 * License for the specific language governing rights and limitations
20 * under the License."
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24 /* $OpenBSD: optimize.c,v 1.5 1996/09/16 02:33:07 tholo Exp $ */
25
26 /*
27 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
28 * The Regents of the University of California. All rights reserved.
29 *
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that: (1) source code distributions
32 * retain the above copyright notice and this paragraph in its entirety, (2)
33 * distributions including binary code include the above copyright notice and
34 * this paragraph in its entirety in the documentation or other materials
35 * provided with the distribution, and (3) all advertising materials mentioning
36 * features or use of this software display the following acknowledgement:
37 * ``This product includes software developed by the University of California,
38 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
39 * the University nor the names of its contributors may be used to endorse
40 * or promote products derived from this software without specific prior
41 * written permission.
42 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
43 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
44 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
45 *
46 * Optimization module for tcpdump intermediate representation.
47 */
48 #ifndef lint
49 static char rcsid[] =
50 "@(#) Header: optimize.c,v 1.58 96/06/16 22:36:59 leres Exp (LBL)";
51 #endif
52
53 #include <sys/types.h>
54 #include <sys/time.h>
55
56 #include <net/bpf.h>
57
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <memory.h>
61
62 #ifdef HAVE_OS_PROTO_H
63 #include "os-proto.h"
64 #endif
65
66 #include "pcap-int.h"
67 #include "gencode.h"
68
69 #ifdef BDEBUG
70 extern int dflag;
71 #endif
72
73 #define A_ATOM BPF_MEMWORDS
74 #define X_ATOM (BPF_MEMWORDS+1)
75
76 #define NOP -1
77
78 /*
79 * This define is used to represent *both* the accumulator and
80 * x register in use-def computations.
81 * Currently, the use-def code assumes only one definition per instruction.
82 */
83 #define AX_ATOM N_ATOMS
84
85 /*
86 * A flag to indicate that further optimization is needed.
87 * Iterative passes are continued until a given pass yields no
88 * branch movement.
89 */
90 static int done;
91
92 /*
93 * A block is marked if only if its mark equals the current mark.
94 * Rather than traverse the code array, marking each item, 'cur_mark' is
95 * incremented. This automatically makes each element unmarked.
96 */
97 static int cur_mark;
98 #define isMarked(p) ((p)->mark == cur_mark)
99 #define unMarkAll() cur_mark += 1
100 #define Mark(p) ((p)->mark = cur_mark)
101
102 static void opt_init(struct block *);
103 static void opt_cleanup(void);
104
105 static void make_marks(struct block *);
106 static void mark_code(struct block *);
107
108 static void intern_blocks(struct block *);
109
110 static int eq_slist(struct slist *, struct slist *);
111
112 static void find_levels_r(struct block *);
113
114 static void find_levels(struct block *);
115 static void find_dom(struct block *);
116 static void propedom(struct edge *);
117 static void find_edom(struct block *);
118 static void find_closure(struct block *);
119 static int atomuse(struct stmt *);
120 static int atomdef(struct stmt *);
121 static void compute_local_ud(struct block *);
122 static void find_ud(struct block *);
123 static void init_val(void);
124 static int F(int, int, int);
125 static __inline void vstore(struct stmt *, int *, int, int);
126 static void opt_blk(struct block *, int);
127 static int use_conflict(struct block *, struct block *);
128 static void opt_j(struct edge *);
129 static void or_pullup(struct block *);
130 static void and_pullup(struct block *);
131 static void opt_blks(struct block *, int);
132 static __inline void link_inedge(struct edge *, struct block *);
133 static void find_inedges(struct block *);
134 static void opt_root(struct block **);
135 static void opt_loop(struct block *, int);
136 static void fold_op(struct stmt *, int, int);
137 static __inline struct slist *this_op(struct slist *);
138 static void opt_not(struct block *);
139 static void opt_peep(struct block *);
140 static void opt_stmt(struct stmt *, int[], int);
141 static void deadstmt(struct stmt *, struct stmt *[]);
142 static void opt_deadstores(struct block *);
143 static void opt_blk(struct block *, int);
144 static int use_conflict(struct block *, struct block *);
145 static void opt_j(struct edge *);
146 static struct block *fold_edge(struct block *, struct edge *);
147 static __inline int eq_blk(struct block *, struct block *);
148 static int slength(struct slist *);
149 static int count_blocks(struct block *);
150 static void number_blks_r(struct block *);
151 static int count_stmts(struct block *);
152 static int convert_code_r(struct block *);
153 #ifdef BDEBUG
154 static void opt_dump(struct block *);
155 #endif
156
157 static int n_blocks;
158 struct block **blocks;
159 static int n_edges;
160 struct edge **edges;
161
162 /*
163 * A bit vector set representation of the dominators.
164 * We round up the set size to the next power of two.
165 */
166 static int nodewords;
167 static int edgewords;
168 struct block **levels;
169 bpf_u_int32 *space;
170 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
171 /*
172 * True if a is in uset {p}
173 */
174 #define SET_MEMBER(p, a) \
175 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
176
177 /*
178 * Add 'a' to uset p.
179 */
180 #define SET_INSERT(p, a) \
181 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
182
183 /*
184 * Delete 'a' from uset p.
185 */
186 #define SET_DELETE(p, a) \
187 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
188
189 /*
190 * a := a intersect b
191 */
192 #define SET_INTERSECT(a, b, n)\
193 {\
194 register bpf_u_int32 *_x = a, *_y = b;\
195 register int _n = n;\
196 while (--_n >= 0) *_x++ &= *_y++;\
197 }
198
199 /*
200 * a := a - b
201 */
202 #define SET_SUBTRACT(a, b, n)\
203 {\
204 register bpf_u_int32 *_x = a, *_y = b;\
205 register int _n = n;\
206 while (--_n >= 0) *_x++ &=~ *_y++;\
207 }
208
209 /*
210 * a := a union b
211 */
212 #define SET_UNION(a, b, n)\
213 {\
214 register bpf_u_int32 *_x = a, *_y = b;\
215 register int _n = n;\
216 while (--_n >= 0) *_x++ |= *_y++;\
217 }
218
219 static uset all_dom_sets;
220 static uset all_closure_sets;
221 static uset all_edge_sets;
222
223 #ifndef MAX
224 #define MAX(a,b) ((a)>(b)?(a):(b))
225 #endif
226
227 static void
228 find_levels_r(b)
229 struct block *b;
230 {
231 int level;
232
233 if (isMarked(b))
234 return;
235
236 Mark(b);
237 b->link = 0;
238
239 if (JT(b)) {
240 find_levels_r(JT(b));
241 find_levels_r(JF(b));
242 level = MAX(JT(b)->level, JF(b)->level) + 1;
243 } else
244 level = 0;
245 b->level = level;
246 b->link = levels[level];
247 levels[level] = b;
248 }
249
250 /*
251 * Level graph. The levels go from 0 at the leaves to
252 * N_LEVELS at the root. The levels[] array points to the
253 * first node of the level list, whose elements are linked
254 * with the 'link' field of the struct block.
255 */
256 static void
257 find_levels(root)
258 struct block *root;
259 {
260 memset((char *)levels, 0, n_blocks * sizeof(*levels));
261 unMarkAll();
262 find_levels_r(root);
263 }
264
265 /*
266 * Find dominator relationships.
267 * Assumes graph has been leveled.
268 */
269 static void
270 find_dom(root)
271 struct block *root;
272 {
273 int i;
274 struct block *b;
275 bpf_u_int32 *x;
276
277 /*
278 * Initialize sets to contain all nodes.
279 */
280 x = all_dom_sets;
281 i = n_blocks * nodewords;
282 while (--i >= 0)
283 *x++ = ~0;
284 /* Root starts off empty. */
285 for (i = nodewords; --i >= 0;)
286 root->dom[i] = 0;
287
288 /* root->level is the highest level no found. */
289 for (i = root->level; i >= 0; --i) {
290 for (b = levels[i]; b; b = b->link) {
291 SET_INSERT(b->dom, b->id);
292 if (JT(b) == 0)
293 continue;
294 SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
295 SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
296 }
297 }
298 }
299
300 static void
301 propedom(ep)
302 struct edge *ep;
303 {
304 SET_INSERT(ep->edom, ep->id);
305 if (ep->succ) {
306 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
307 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
308 }
309 }
310
311 /*
312 * Compute edge dominators.
313 * Assumes graph has been leveled and predecessors established.
314 */
315 static void
316 find_edom(root)
317 struct block *root;
318 {
319 int i;
320 uset x;
321 struct block *b;
322
323 x = all_edge_sets;
324 for (i = n_edges * edgewords; --i >= 0; )
325 x[i] = ~0;
326
327 /* root->level is the highest level no found. */
328 memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
329 memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
330 for (i = root->level; i >= 0; --i) {
331 for (b = levels[i]; b != 0; b = b->link) {
332 propedom(&b->et);
333 propedom(&b->ef);
334 }
335 }
336 }
337
338 /*
339 * Find the backwards transitive closure of the flow graph. These sets
340 * are backwards in the sense that we find the set of nodes that reach
341 * a given node, not the set of nodes that can be reached by a node.
342 *
343 * Assumes graph has been leveled.
344 */
345 static void
346 find_closure(root)
347 struct block *root;
348 {
349 int i;
350 struct block *b;
351
352 /*
353 * Initialize sets to contain no nodes.
354 */
355 memset((char *)all_closure_sets, 0,
356 n_blocks * nodewords * sizeof(*all_closure_sets));
357
358 /* root->level is the highest level no found. */
359 for (i = root->level; i >= 0; --i) {
360 for (b = levels[i]; b; b = b->link) {
361 SET_INSERT(b->closure, b->id);
362 if (JT(b) == 0)
363 continue;
364 SET_UNION(JT(b)->closure, b->closure, nodewords);
365 SET_UNION(JF(b)->closure, b->closure, nodewords);
366 }
367 }
368 }
369
370 /*
371 * Return the register number that is used by s. If A and X are both
372 * used, return AX_ATOM. If no register is used, return -1.
373 *
374 * The implementation should probably change to an array access.
375 */
376 static int
377 atomuse(s)
378 struct stmt *s;
379 {
380 register int c = s->code;
381
382 if (c == NOP)
383 return -1;
384
385 switch (BPF_CLASS(c)) {
386
387 case BPF_RET:
388 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
389 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
390
391 case BPF_LD:
392 case BPF_LDX:
393 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
394 (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
395
396 case BPF_ST:
397 return A_ATOM;
398
399 case BPF_STX:
400 return X_ATOM;
401
402 case BPF_JMP:
403 case BPF_ALU:
404 if (BPF_SRC(c) == BPF_X)
405 return AX_ATOM;
406 return A_ATOM;
407
408 case BPF_MISC:
409 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
410 }
411 abort();
412 /* NOTREACHED */
413 }
414
415 /*
416 * Return the register number that is defined by 's'. We assume that
417 * a single stmt cannot define more than one register. If no register
418 * is defined, return -1.
419 *
420 * The implementation should probably change to an array access.
421 */
422 static int
423 atomdef(s)
424 struct stmt *s;
425 {
426 if (s->code == NOP)
427 return -1;
428
429 switch (BPF_CLASS(s->code)) {
430
431 case BPF_LD:
432 case BPF_ALU:
433 return A_ATOM;
434
435 case BPF_LDX:
436 return X_ATOM;
437
438 case BPF_ST:
439 case BPF_STX:
440 return s->k;
441
442 case BPF_MISC:
443 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
444 }
445 return -1;
446 }
447
448 static void
449 compute_local_ud(b)
450 struct block *b;
451 {
452 struct slist *s;
453 atomset def = 0, use = 0, kill = 0;
454 int atom;
455
456 for (s = b->stmts; s; s = s->next) {
457 if (s->s.code == NOP)
458 continue;
459 atom = atomuse(&s->s);
460 if (atom >= 0) {
461 if (atom == AX_ATOM) {
462 if (!ATOMELEM(def, X_ATOM))
463 use |= ATOMMASK(X_ATOM);
464 if (!ATOMELEM(def, A_ATOM))
465 use |= ATOMMASK(A_ATOM);
466 }
467 else if (atom < N_ATOMS) {
468 if (!ATOMELEM(def, atom))
469 use |= ATOMMASK(atom);
470 }
471 else
472 abort();
473 }
474 atom = atomdef(&s->s);
475 if (atom >= 0) {
476 if (!ATOMELEM(use, atom))
477 kill |= ATOMMASK(atom);
478 def |= ATOMMASK(atom);
479 }
480 }
481 if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
482 use |= ATOMMASK(A_ATOM);
483
484 b->def = def;
485 b->kill = kill;
486 b->in_use = use;
487 }
488
489 /*
490 * Assume graph is already leveled.
491 */
492 static void
493 find_ud(root)
494 struct block *root;
495 {
496 int i, maxlevel;
497 struct block *p;
498
499 /*
500 * root->level is the highest level no found;
501 * count down from there.
502 */
503 maxlevel = root->level;
504 for (i = maxlevel; i >= 0; --i)
505 for (p = levels[i]; p; p = p->link) {
506 compute_local_ud(p);
507 p->out_use = 0;
508 }
509
510 for (i = 1; i <= maxlevel; ++i) {
511 for (p = levels[i]; p; p = p->link) {
512 p->out_use |= JT(p)->in_use | JF(p)->in_use;
513 p->in_use |= p->out_use &~ p->kill;
514 }
515 }
516 }
517
518 /*
519 * These data structures are used in a Cocke and Shwarz style
520 * value numbering scheme. Since the flowgraph is acyclic,
521 * exit values can be propagated from a node's predecessors
522 * provided it is uniquely defined.
523 */
524 struct valnode {
525 int code;
526 int v0, v1;
527 int val;
528 struct valnode *next;
529 };
530
531 #define MODULUS 213
532 static struct valnode *hashtbl[MODULUS];
533 static int curval;
534 static int maxval;
535
536 /* Integer constants mapped with the load immediate opcode. */
537 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
538
539 struct vmapinfo {
540 int is_const;
541 bpf_int32 const_val;
542 };
543
544 struct vmapinfo *vmap;
545 struct valnode *vnode_base;
546 struct valnode *next_vnode;
547
548 static void
549 init_val()
550 {
551 curval = 0;
552 next_vnode = vnode_base;
553 memset((char *)vmap, 0, maxval * sizeof(*vmap));
554 memset((char *)hashtbl, 0, sizeof hashtbl);
555 }
556
557 /* Because we really don't have an IR, this stuff is a little messy. */
558 static int
559 F(code, v0, v1)
560 int code;
561 int v0, v1;
562 {
563 u_int hash;
564 int val;
565 struct valnode *p;
566
567 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
568 hash %= MODULUS;
569
570 for (p = hashtbl[hash]; p; p = p->next)
571 if (p->code == code && p->v0 == v0 && p->v1 == v1)
572 return p->val;
573
574 val = ++curval;
575 if (BPF_MODE(code) == BPF_IMM &&
576 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
577 vmap[val].const_val = v0;
578 vmap[val].is_const = 1;
579 }
580 p = next_vnode++;
581 p->val = val;
582 p->code = code;
583 p->v0 = v0;
584 p->v1 = v1;
585 p->next = hashtbl[hash];
586 hashtbl[hash] = p;
587
588 return val;
589 }
590
591 static __inline void
592 vstore(s, valp, newval, alter)
593 struct stmt *s;
594 int *valp;
595 int newval;
596 int alter;
597 {
598 if (alter && *valp == newval)
599 s->code = NOP;
600 else
601 *valp = newval;
602 }
603
604 static void
605 fold_op(s, v0, v1)
606 struct stmt *s;
607 int v0, v1;
608 {
609 bpf_int32 a, b;
610
611 a = vmap[v0].const_val;
612 b = vmap[v1].const_val;
613
614 switch (BPF_OP(s->code)) {
615 case BPF_ADD:
616 a += b;
617 break;
618
619 case BPF_SUB:
620 a -= b;
621 break;
622
623 case BPF_MUL:
624 a *= b;
625 break;
626
627 case BPF_DIV:
628 if (b == 0)
629 bpf_error("division by zero");
630 a /= b;
631 break;
632
633 case BPF_AND:
634 a &= b;
635 break;
636
637 case BPF_OR:
638 a |= b;
639 break;
640
641 case BPF_LSH:
642 a <<= b;
643 break;
644
645 case BPF_RSH:
646 a >>= b;
647 break;
648
649 case BPF_NEG:
650 a = -a;
651 break;
652
653 default:
654 abort();
655 }
656 s->k = a;
657 s->code = BPF_LD|BPF_IMM;
658 done = 0;
659 }
660
661 static __inline struct slist *
662 this_op(s)
663 struct slist *s;
664 {
665 while (s != 0 && s->s.code == NOP)
666 s = s->next;
667 return s;
668 }
669
670 static void
671 opt_not(b)
672 struct block *b;
673 {
674 struct block *tmp = JT(b);
675
676 JT(b) = JF(b);
677 JF(b) = tmp;
678 }
679
680 static void
681 opt_peep(b)
682 struct block *b;
683 {
684 struct slist *s;
685 struct slist *next, *last;
686 int val;
687
688 s = b->stmts;
689 if (s == 0)
690 return;
691
692 last = s;
693 while (1) {
694 s = this_op(s);
695 if (s == 0)
696 break;
697 next = this_op(s->next);
698 if (next == 0)
699 break;
700 last = next;
701
702 /*
703 * st M[k] --> st M[k]
704 * ldx M[k] tax
705 */
706 if (s->s.code == BPF_ST &&
707 next->s.code == (BPF_LDX|BPF_MEM) &&
708 s->s.k == next->s.k) {
709 done = 0;
710 next->s.code = BPF_MISC|BPF_TAX;
711 }
712 /*
713 * ld #k --> ldx #k
714 * tax txa
715 */
716 if (s->s.code == (BPF_LD|BPF_IMM) &&
717 next->s.code == (BPF_MISC|BPF_TAX)) {
718 s->s.code = BPF_LDX|BPF_IMM;
719 next->s.code = BPF_MISC|BPF_TXA;
720 done = 0;
721 }
722 /*
723 * This is an ugly special case, but it happens
724 * when you say tcp[k] or udp[k] where k is a constant.
725 */
726 if (s->s.code == (BPF_LD|BPF_IMM)) {
727 struct slist *add, *tax, *ild;
728
729 /*
730 * Check that X isn't used on exit from this
731 * block (which the optimizer might cause).
732 * We know the code generator won't generate
733 * any local dependencies.
734 */
735 if (ATOMELEM(b->out_use, X_ATOM))
736 break;
737
738 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
739 add = next;
740 else
741 add = this_op(next->next);
742 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
743 break;
744
745 tax = this_op(add->next);
746 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
747 break;
748
749 ild = this_op(tax->next);
750 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
751 BPF_MODE(ild->s.code) != BPF_IND)
752 break;
753 /*
754 * XXX We need to check that X is not
755 * subsequently used. We know we can eliminate the
756 * accumulator modifications since it is defined
757 * by the last stmt of this sequence.
758 *
759 * We want to turn this sequence:
760 *
761 * (004) ldi #0x2 {s}
762 * (005) ldxms [14] {next} -- optional
763 * (006) addx {add}
764 * (007) tax {tax}
765 * (008) ild [x+0] {ild}
766 *
767 * into this sequence:
768 *
769 * (004) nop
770 * (005) ldxms [14]
771 * (006) nop
772 * (007) nop
773 * (008) ild [x+2]
774 *
775 */
776 ild->s.k += s->s.k;
777 s->s.code = NOP;
778 add->s.code = NOP;
779 tax->s.code = NOP;
780 done = 0;
781 }
782 s = next;
783 }
784 /*
785 * If we have a subtract to do a comparison, and the X register
786 * is a known constant, we can merge this value into the
787 * comparison.
788 */
789 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
790 !ATOMELEM(b->out_use, A_ATOM)) {
791 val = b->val[X_ATOM];
792 if (vmap[val].is_const) {
793 int op;
794
795 b->s.k += vmap[val].const_val;
796 op = BPF_OP(b->s.code);
797 if (op == BPF_JGT || op == BPF_JGE) {
798 struct block *t = JT(b);
799 JT(b) = JF(b);
800 JF(b) = t;
801 b->s.k += 0x80000000;
802 }
803 last->s.code = NOP;
804 done = 0;
805 } else if (b->s.k == 0) {
806 /*
807 * sub x -> nop
808 * j #0 j x
809 */
810 last->s.code = NOP;
811 b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
812 BPF_X;
813 done = 0;
814 }
815 }
816 /*
817 * Likewise, a constant subtract can be simplified.
818 */
819 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
820 !ATOMELEM(b->out_use, A_ATOM)) {
821 int op;
822
823 b->s.k += last->s.k;
824 last->s.code = NOP;
825 op = BPF_OP(b->s.code);
826 if (op == BPF_JGT || op == BPF_JGE) {
827 struct block *t = JT(b);
828 JT(b) = JF(b);
829 JF(b) = t;
830 b->s.k += 0x80000000;
831 }
832 done = 0;
833 }
834 /*
835 * and #k nop
836 * jeq #0 -> jset #k
837 */
838 if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
839 !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
840 b->s.k = last->s.k;
841 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
842 last->s.code = NOP;
843 done = 0;
844 opt_not(b);
845 }
846 /*
847 * If the accumulator is a known constant, we can compute the
848 * comparison result.
849 */
850 val = b->val[A_ATOM];
851 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
852 bpf_int32 v = vmap[val].const_val;
853 switch (BPF_OP(b->s.code)) {
854
855 case BPF_JEQ:
856 v = v == b->s.k;
857 break;
858
859 case BPF_JGT:
860 v = (unsigned)v > b->s.k;
861 break;
862
863 case BPF_JGE:
864 v = (unsigned)v >= b->s.k;
865 break;
866
867 case BPF_JSET:
868 v &= b->s.k;
869 break;
870
871 default:
872 abort();
873 }
874 if (JF(b) != JT(b))
875 done = 0;
876 if (v)
877 JF(b) = JT(b);
878 else
879 JT(b) = JF(b);
880 }
881 }
882
883 /*
884 * Compute the symbolic value of expression of 's', and update
885 * anything it defines in the value table 'val'. If 'alter' is true,
886 * do various optimizations. This code would be cleaner if symbolic
887 * evaluation and code transformations weren't folded together.
888 */
889 static void
890 opt_stmt(s, val, alter)
891 struct stmt *s;
892 int val[];
893 int alter;
894 {
895 int op;
896 int v;
897
898 switch (s->code) {
899
900 case BPF_LD|BPF_ABS|BPF_W:
901 case BPF_LD|BPF_ABS|BPF_H:
902 case BPF_LD|BPF_ABS|BPF_B:
903 v = F(s->code, s->k, 0L);
904 vstore(s, &val[A_ATOM], v, alter);
905 break;
906
907 case BPF_LD|BPF_IND|BPF_W:
908 case BPF_LD|BPF_IND|BPF_H:
909 case BPF_LD|BPF_IND|BPF_B:
910 v = val[X_ATOM];
911 if (alter && vmap[v].is_const) {
912 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
913 s->k += vmap[v].const_val;
914 v = F(s->code, s->k, 0L);
915 done = 0;
916 }
917 else
918 v = F(s->code, s->k, v);
919 vstore(s, &val[A_ATOM], v, alter);
920 break;
921
922 case BPF_LD|BPF_LEN:
923 v = F(s->code, 0L, 0L);
924 vstore(s, &val[A_ATOM], v, alter);
925 break;
926
927 case BPF_LD|BPF_IMM:
928 v = K(s->k);
929 vstore(s, &val[A_ATOM], v, alter);
930 break;
931
932 case BPF_LDX|BPF_IMM:
933 v = K(s->k);
934 vstore(s, &val[X_ATOM], v, alter);
935 break;
936
937 case BPF_LDX|BPF_MSH|BPF_B:
938 v = F(s->code, s->k, 0L);
939 vstore(s, &val[X_ATOM], v, alter);
940 break;
941
942 case BPF_ALU|BPF_NEG:
943 if (alter && vmap[val[A_ATOM]].is_const) {
944 s->code = BPF_LD|BPF_IMM;
945 s->k = -vmap[val[A_ATOM]].const_val;
946 val[A_ATOM] = K(s->k);
947 }
948 else
949 val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
950 break;
951
952 case BPF_ALU|BPF_ADD|BPF_K:
953 case BPF_ALU|BPF_SUB|BPF_K:
954 case BPF_ALU|BPF_MUL|BPF_K:
955 case BPF_ALU|BPF_DIV|BPF_K:
956 case BPF_ALU|BPF_AND|BPF_K:
957 case BPF_ALU|BPF_OR|BPF_K:
958 case BPF_ALU|BPF_LSH|BPF_K:
959 case BPF_ALU|BPF_RSH|BPF_K:
960 op = BPF_OP(s->code);
961 if (alter) {
962 if (s->k == 0) {
963 if (op == BPF_ADD || op == BPF_SUB ||
964 op == BPF_LSH || op == BPF_RSH ||
965 op == BPF_OR) {
966 s->code = NOP;
967 break;
968 }
969 if (op == BPF_MUL || op == BPF_AND) {
970 s->code = BPF_LD|BPF_IMM;
971 val[A_ATOM] = K(s->k);
972 break;
973 }
974 }
975 if (vmap[val[A_ATOM]].is_const) {
976 fold_op(s, val[A_ATOM], K(s->k));
977 val[A_ATOM] = K(s->k);
978 break;
979 }
980 }
981 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
982 break;
983
984 case BPF_ALU|BPF_ADD|BPF_X:
985 case BPF_ALU|BPF_SUB|BPF_X:
986 case BPF_ALU|BPF_MUL|BPF_X:
987 case BPF_ALU|BPF_DIV|BPF_X:
988 case BPF_ALU|BPF_AND|BPF_X:
989 case BPF_ALU|BPF_OR|BPF_X:
990 case BPF_ALU|BPF_LSH|BPF_X:
991 case BPF_ALU|BPF_RSH|BPF_X:
992 op = BPF_OP(s->code);
993 if (alter && vmap[val[X_ATOM]].is_const) {
994 if (vmap[val[A_ATOM]].is_const) {
995 fold_op(s, val[A_ATOM], val[X_ATOM]);
996 val[A_ATOM] = K(s->k);
997 }
998 else {
999 s->code = BPF_ALU|BPF_K|op;
1000 s->k = vmap[val[X_ATOM]].const_val;
1001 done = 0;
1002 val[A_ATOM] =
1003 F(s->code, val[A_ATOM], K(s->k));
1004 }
1005 break;
1006 }
1007 /*
1008 * Check if we're doing something to an accumulator
1009 * that is 0, and simplify. This may not seem like
1010 * much of a simplification but it could open up further
1011 * optimizations.
1012 * XXX We could also check for mul by 1, and -1, etc.
1013 */
1014 if (alter && vmap[val[A_ATOM]].is_const
1015 && vmap[val[A_ATOM]].const_val == 0) {
1016 if (op == BPF_ADD || op == BPF_OR ||
1017 op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
1018 s->code = BPF_MISC|BPF_TXA;
1019 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1020 break;
1021 }
1022 else if (op == BPF_MUL || op == BPF_DIV ||
1023 op == BPF_AND) {
1024 s->code = BPF_LD|BPF_IMM;
1025 s->k = 0;
1026 vstore(s, &val[A_ATOM], K(s->k), alter);
1027 break;
1028 }
1029 else if (op == BPF_NEG) {
1030 s->code = NOP;
1031 break;
1032 }
1033 }
1034 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1035 break;
1036
1037 case BPF_MISC|BPF_TXA:
1038 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1039 break;
1040
1041 case BPF_LD|BPF_MEM:
1042 v = val[s->k];
1043 if (alter && vmap[v].is_const) {
1044 s->code = BPF_LD|BPF_IMM;
1045 s->k = vmap[v].const_val;
1046 done = 0;
1047 }
1048 vstore(s, &val[A_ATOM], v, alter);
1049 break;
1050
1051 case BPF_MISC|BPF_TAX:
1052 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1053 break;
1054
1055 case BPF_LDX|BPF_MEM:
1056 v = val[s->k];
1057 if (alter && vmap[v].is_const) {
1058 s->code = BPF_LDX|BPF_IMM;
1059 s->k = vmap[v].const_val;
1060 done = 0;
1061 }
1062 vstore(s, &val[X_ATOM], v, alter);
1063 break;
1064
1065 case BPF_ST:
1066 vstore(s, &val[s->k], val[A_ATOM], alter);
1067 break;
1068
1069 case BPF_STX:
1070 vstore(s, &val[s->k], val[X_ATOM], alter);
1071 break;
1072 }
1073 }
1074
1075 static void
1076 deadstmt(s, last)
1077 register struct stmt *s;
1078 register struct stmt *last[];
1079 {
1080 register int atom;
1081
1082 atom = atomuse(s);
1083 if (atom >= 0) {
1084 if (atom == AX_ATOM) {
1085 last[X_ATOM] = 0;
1086 last[A_ATOM] = 0;
1087 }
1088 else
1089 last[atom] = 0;
1090 }
1091 atom = atomdef(s);
1092 if (atom >= 0) {
1093 if (last[atom]) {
1094 done = 0;
1095 last[atom]->code = NOP;
1096 }
1097 last[atom] = s;
1098 }
1099 }
1100
1101 static void
1102 opt_deadstores(b)
1103 register struct block *b;
1104 {
1105 register struct slist *s;
1106 register int atom;
1107 struct stmt *last[N_ATOMS];
1108
1109 memset((char *)last, 0, sizeof last);
1110
1111 for (s = b->stmts; s != 0; s = s->next)
1112 deadstmt(&s->s, last);
1113 deadstmt(&b->s, last);
1114
1115 for (atom = 0; atom < N_ATOMS; ++atom)
1116 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1117 last[atom]->code = NOP;
1118 done = 0;
1119 }
1120 }
1121
1122 static void
1123 opt_blk(b, do_stmts)
1124 struct block *b;
1125 int do_stmts;
1126 {
1127 struct slist *s;
1128 struct edge *p;
1129 int i;
1130 bpf_int32 aval;
1131
1132 /*
1133 * Initialize the atom values.
1134 * If we have no predecessors, everything is undefined.
1135 * Otherwise, we inherent our values from our predecessors.
1136 * If any register has an ambiguous value (i.e. control paths are
1137 * merging) give it the undefined value of 0.
1138 */
1139 p = b->in_edges;
1140 if (p == 0)
1141 memset((char *)b->val, 0, sizeof(b->val));
1142 else {
1143 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1144 while ((p = p->next) != NULL) {
1145 for (i = 0; i < N_ATOMS; ++i)
1146 if (b->val[i] != p->pred->val[i])
1147 b->val[i] = 0;
1148 }
1149 }
1150 aval = b->val[A_ATOM];
1151 for (s = b->stmts; s; s = s->next)
1152 opt_stmt(&s->s, b->val, do_stmts);
1153
1154 /*
1155 * This is a special case: if we don't use anything from this
1156 * block, and we load the accumulator with value that is
1157 * already there, or if this block is a return,
1158 * eliminate all the statements.
1159 */
1160 if (do_stmts &&
1161 ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
1162 BPF_CLASS(b->s.code) == BPF_RET)) {
1163 if (b->stmts != 0) {
1164 b->stmts = 0;
1165 done = 0;
1166 }
1167 } else {
1168 opt_peep(b);
1169 opt_deadstores(b);
1170 }
1171 /*
1172 * Set up values for branch optimizer.
1173 */
1174 if (BPF_SRC(b->s.code) == BPF_K)
1175 b->oval = K(b->s.k);
1176 else
1177 b->oval = b->val[X_ATOM];
1178 b->et.code = b->s.code;
1179 b->ef.code = -b->s.code;
1180 }
1181
1182 /*
1183 * Return true if any register that is used on exit from 'succ', has
1184 * an exit value that is different from the corresponding exit value
1185 * from 'b'.
1186 */
1187 static int
1188 use_conflict(b, succ)
1189 struct block *b, *succ;
1190 {
1191 int atom;
1192 atomset use = succ->out_use;
1193
1194 if (use == 0)
1195 return 0;
1196
1197 for (atom = 0; atom < N_ATOMS; ++atom)
1198 if (ATOMELEM(use, atom))
1199 if (b->val[atom] != succ->val[atom])
1200 return 1;
1201 return 0;
1202 }
1203
1204 static struct block *
1205 fold_edge(child, ep)
1206 struct block *child;
1207 struct edge *ep;
1208 {
1209 int sense;
1210 int aval0, aval1, oval0, oval1;
1211 int code = ep->code;
1212
1213 if (code < 0) {
1214 code = -code;
1215 sense = 0;
1216 } else
1217 sense = 1;
1218
1219 if (child->s.code != code)
1220 return 0;
1221
1222 aval0 = child->val[A_ATOM];
1223 oval0 = child->oval;
1224 aval1 = ep->pred->val[A_ATOM];
1225 oval1 = ep->pred->oval;
1226
1227 if (aval0 != aval1)
1228 return 0;
1229
1230 if (oval0 == oval1)
1231 /*
1232 * The operands are identical, so the
1233 * result is true if a true branch was
1234 * taken to get here, otherwise false.
1235 */
1236 return sense ? JT(child) : JF(child);
1237
1238 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1239 /*
1240 * At this point, we only know the comparison if we
1241 * came down the true branch, and it was an equality
1242 * comparison with a constant. We rely on the fact that
1243 * distinct constants have distinct value numbers.
1244 */
1245 return JF(child);
1246
1247 return 0;
1248 }
1249
1250 static void
1251 opt_j(ep)
1252 struct edge *ep;
1253 {
1254 register int i, k;
1255 register struct block *target;
1256
1257 if (JT(ep->succ) == 0)
1258 return;
1259
1260 if (JT(ep->succ) == JF(ep->succ)) {
1261 /*
1262 * Common branch targets can be eliminated, provided
1263 * there is no data dependency.
1264 */
1265 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1266 done = 0;
1267 ep->succ = JT(ep->succ);
1268 }
1269 }
1270 /*
1271 * For each edge dominator that matches the successor of this
1272 * edge, promote the edge successor to the its grandchild.
1273 *
1274 * XXX We violate the set abstraction here in favor a reasonably
1275 * efficient loop.
1276 */
1277 top:
1278 for (i = 0; i < edgewords; ++i) {
1279 register bpf_u_int32 x = ep->edom[i];
1280
1281 while (x != 0) {
1282 k = ffs(x) - 1;
1283 x &=~ (1 << k);
1284 k += i * BITS_PER_WORD;
1285
1286 target = fold_edge(ep->succ, edges[k]);
1287 /*
1288 * Check that there is no data dependency between
1289 * nodes that will be violated if we move the edge.
1290 */
1291 if (target != 0 && !use_conflict(ep->pred, target)) {
1292 done = 0;
1293 ep->succ = target;
1294 if (JT(target) != 0)
1295 /*
1296 * Start over unless we hit a leaf.
1297 */
1298 goto top;
1299 return;
1300 }
1301 }
1302 }
1303 }
1304
1305
1306 static void
1307 or_pullup(b)
1308 struct block *b;
1309 {
1310 int val, at_top;
1311 struct block *pull;
1312 struct block **diffp, **samep;
1313 struct edge *ep;
1314
1315 ep = b->in_edges;
1316 if (ep == 0)
1317 return;
1318
1319 /*
1320 * Make sure each predecessor loads the same value.
1321 * XXX why?
1322 */
1323 val = ep->pred->val[A_ATOM];
1324 for (ep = ep->next; ep != 0; ep = ep->next)
1325 if (val != ep->pred->val[A_ATOM])
1326 return;
1327
1328 if (JT(b->in_edges->pred) == b)
1329 diffp = &JT(b->in_edges->pred);
1330 else
1331 diffp = &JF(b->in_edges->pred);
1332
1333 at_top = 1;
1334 while (1) {
1335 if (*diffp == 0)
1336 return;
1337
1338 if (JT(*diffp) != JT(b))
1339 return;
1340
1341 if (!SET_MEMBER((*diffp)->dom, b->id))
1342 return;
1343
1344 if ((*diffp)->val[A_ATOM] != val)
1345 break;
1346
1347 diffp = &JF(*diffp);
1348 at_top = 0;
1349 }
1350 samep = &JF(*diffp);
1351 while (1) {
1352 if (*samep == 0)
1353 return;
1354
1355 if (JT(*samep) != JT(b))
1356 return;
1357
1358 if (!SET_MEMBER((*samep)->dom, b->id))
1359 return;
1360
1361 if ((*samep)->val[A_ATOM] == val)
1362 break;
1363
1364 /* XXX Need to check that there are no data dependencies
1365 between dp0 and dp1. Currently, the code generator
1366 will not produce such dependencies. */
1367 samep = &JF(*samep);
1368 }
1369 #ifdef notdef
1370 /* XXX This doesn't cover everything. */
1371 for (i = 0; i < N_ATOMS; ++i)
1372 if ((*samep)->val[i] != pred->val[i])
1373 return;
1374 #endif
1375 /* Pull up the node. */
1376 pull = *samep;
1377 *samep = JF(pull);
1378 JF(pull) = *diffp;
1379
1380 /*
1381 * At the top of the chain, each predecessor needs to point at the
1382 * pulled up node. Inside the chain, there is only one predecessor
1383 * to worry about.
1384 */
1385 if (at_top) {
1386 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1387 if (JT(ep->pred) == b)
1388 JT(ep->pred) = pull;
1389 else
1390 JF(ep->pred) = pull;
1391 }
1392 }
1393 else
1394 *diffp = pull;
1395
1396 done = 0;
1397 }
1398
1399 static void
1400 and_pullup(b)
1401 struct block *b;
1402 {
1403 int val, at_top;
1404 struct block *pull;
1405 struct block **diffp, **samep;
1406 struct edge *ep;
1407
1408 ep = b->in_edges;
1409 if (ep == 0)
1410 return;
1411
1412 /*
1413 * Make sure each predecessor loads the same value.
1414 */
1415 val = ep->pred->val[A_ATOM];
1416 for (ep = ep->next; ep != 0; ep = ep->next)
1417 if (val != ep->pred->val[A_ATOM])
1418 return;
1419
1420 if (JT(b->in_edges->pred) == b)
1421 diffp = &JT(b->in_edges->pred);
1422 else
1423 diffp = &JF(b->in_edges->pred);
1424
1425 at_top = 1;
1426 while (1) {
1427 if (*diffp == 0)
1428 return;
1429
1430 if (JF(*diffp) != JF(b))
1431 return;
1432
1433 if (!SET_MEMBER((*diffp)->dom, b->id))
1434 return;
1435
1436 if ((*diffp)->val[A_ATOM] != val)
1437 break;
1438
1439 diffp = &JT(*diffp);
1440 at_top = 0;
1441 }
1442 samep = &JT(*diffp);
1443 while (1) {
1444 if (*samep == 0)
1445 return;
1446
1447 if (JF(*samep) != JF(b))
1448 return;
1449
1450 if (!SET_MEMBER((*samep)->dom, b->id))
1451 return;
1452
1453 if ((*samep)->val[A_ATOM] == val)
1454 break;
1455
1456 /* XXX Need to check that there are no data dependencies
1457 between diffp and samep. Currently, the code generator
1458 will not produce such dependencies. */
1459 samep = &JT(*samep);
1460 }
1461 #ifdef notdef
1462 /* XXX This doesn't cover everything. */
1463 for (i = 0; i < N_ATOMS; ++i)
1464 if ((*samep)->val[i] != pred->val[i])
1465 return;
1466 #endif
1467 /* Pull up the node. */
1468 pull = *samep;
1469 *samep = JT(pull);
1470 JT(pull) = *diffp;
1471
1472 /*
1473 * At the top of the chain, each predecessor needs to point at the
1474 * pulled up node. Inside the chain, there is only one predecessor
1475 * to worry about.
1476 */
1477 if (at_top) {
1478 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1479 if (JT(ep->pred) == b)
1480 JT(ep->pred) = pull;
1481 else
1482 JF(ep->pred) = pull;
1483 }
1484 }
1485 else
1486 *diffp = pull;
1487
1488 done = 0;
1489 }
1490
1491 static void
1492 opt_blks(root, do_stmts)
1493 struct block *root;
1494 int do_stmts;
1495 {
1496 int i, maxlevel;
1497 struct block *p;
1498
1499 init_val();
1500 maxlevel = root->level;
1501 for (i = maxlevel; i >= 0; --i)
1502 for (p = levels[i]; p; p = p->link)
1503 opt_blk(p, do_stmts);
1504
1505 if (do_stmts)
1506 /*
1507 * No point trying to move branches; it can't possibly
1508 * make a difference at this point.
1509 */
1510 return;
1511
1512 for (i = 1; i <= maxlevel; ++i) {
1513 for (p = levels[i]; p; p = p->link) {
1514 opt_j(&p->et);
1515 opt_j(&p->ef);
1516 }
1517 }
1518 for (i = 1; i <= maxlevel; ++i) {
1519 for (p = levels[i]; p; p = p->link) {
1520 or_pullup(p);
1521 and_pullup(p);
1522 }
1523 }
1524 }
1525
1526 static __inline void
1527 link_inedge(parent, child)
1528 struct edge *parent;
1529 struct block *child;
1530 {
1531 parent->next = child->in_edges;
1532 child->in_edges = parent;
1533 }
1534
1535 static void
1536 find_inedges(root)
1537 struct block *root;
1538 {
1539 int i;
1540 struct block *b;
1541
1542 for (i = 0; i < n_blocks; ++i)
1543 blocks[i]->in_edges = 0;
1544
1545 /*
1546 * Traverse the graph, adding each edge to the predecessor
1547 * list of its successors. Skip the leaves (i.e. level 0).
1548 */
1549 for (i = root->level; i > 0; --i) {
1550 for (b = levels[i]; b != 0; b = b->link) {
1551 link_inedge(&b->et, JT(b));
1552 link_inedge(&b->ef, JF(b));
1553 }
1554 }
1555 }
1556
1557 static void
1558 opt_root(b)
1559 struct block **b;
1560 {
1561 struct slist *tmp, *s;
1562
1563 s = (*b)->stmts;
1564 (*b)->stmts = 0;
1565 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1566 *b = JT(*b);
1567
1568 tmp = (*b)->stmts;
1569 if (tmp != 0)
1570 sappend(s, tmp);
1571 (*b)->stmts = s;
1572
1573 /*
1574 * If the root node is a return, then there is no
1575 * point executing any statements (since the bpf machine
1576 * has no side effects).
1577 */
1578 if (BPF_CLASS((*b)->s.code) == BPF_RET)
1579 (*b)->stmts = 0;
1580 }
1581
1582 static void
1583 opt_loop(root, do_stmts)
1584 struct block *root;
1585 int do_stmts;
1586 {
1587
1588 #ifdef BDEBUG
1589 if (dflag > 1)
1590 opt_dump(root);
1591 #endif
1592 do {
1593 done = 1;
1594 find_levels(root);
1595 find_dom(root);
1596 find_closure(root);
1597 find_inedges(root);
1598 find_ud(root);
1599 find_edom(root);
1600 opt_blks(root, do_stmts);
1601 #ifdef BDEBUG
1602 if (dflag > 1)
1603 opt_dump(root);
1604 #endif
1605 } while (!done);
1606 }
1607
1608 /*
1609 * Optimize the filter code in its dag representation.
1610 */
1611 void
1612 bpf_optimize(rootp)
1613 struct block **rootp;
1614 {
1615 struct block *root;
1616
1617 root = *rootp;
1618
1619 opt_init(root);
1620 opt_loop(root, 0);
1621 opt_loop(root, 1);
1622 intern_blocks(root);
1623 opt_root(rootp);
1624 opt_cleanup();
1625 }
1626
1627 static void
1628 make_marks(p)
1629 struct block *p;
1630 {
1631 if (!isMarked(p)) {
1632 Mark(p);
1633 if (BPF_CLASS(p->s.code) != BPF_RET) {
1634 make_marks(JT(p));
1635 make_marks(JF(p));
1636 }
1637 }
1638 }
1639
1640 /*
1641 * Mark code array such that isMarked(i) is true
1642 * only for nodes that are alive.
1643 */
1644 static void
1645 mark_code(p)
1646 struct block *p;
1647 {
1648 cur_mark += 1;
1649 make_marks(p);
1650 }
1651
1652 /*
1653 * True iff the two stmt lists load the same value from the packet into
1654 * the accumulator.
1655 */
1656 static int
1657 eq_slist(x, y)
1658 struct slist *x, *y;
1659 {
1660 while (1) {
1661 while (x && x->s.code == NOP)
1662 x = x->next;
1663 while (y && y->s.code == NOP)
1664 y = y->next;
1665 if (x == 0)
1666 return y == 0;
1667 if (y == 0)
1668 return x == 0;
1669 if (x->s.code != y->s.code || x->s.k != y->s.k)
1670 return 0;
1671 x = x->next;
1672 y = y->next;
1673 }
1674 }
1675
1676 static __inline int
1677 eq_blk(b0, b1)
1678 struct block *b0, *b1;
1679 {
1680 if (b0->s.code == b1->s.code &&
1681 b0->s.k == b1->s.k &&
1682 b0->et.succ == b1->et.succ &&
1683 b0->ef.succ == b1->ef.succ)
1684 return eq_slist(b0->stmts, b1->stmts);
1685 return 0;
1686 }
1687
1688 static void
1689 intern_blocks(root)
1690 struct block *root;
1691 {
1692 struct block *p;
1693 int i, j;
1694 int done;
1695 top:
1696 done = 1;
1697 for (i = 0; i < n_blocks; ++i)
1698 blocks[i]->link = 0;
1699
1700 mark_code(root);
1701
1702 for (i = n_blocks - 1; --i >= 0; ) {
1703 if (!isMarked(blocks[i]))
1704 continue;
1705 for (j = i + 1; j < n_blocks; ++j) {
1706 if (!isMarked(blocks[j]))
1707 continue;
1708 if (eq_blk(blocks[i], blocks[j])) {
1709 blocks[i]->link = blocks[j]->link ?
1710 blocks[j]->link : blocks[j];
1711 break;
1712 }
1713 }
1714 }
1715 for (i = 0; i < n_blocks; ++i) {
1716 p = blocks[i];
1717 if (JT(p) == 0)
1718 continue;
1719 if (JT(p)->link) {
1720 done = 0;
1721 JT(p) = JT(p)->link;
1722 }
1723 if (JF(p)->link) {
1724 done = 0;
1725 JF(p) = JF(p)->link;
1726 }
1727 }
1728 if (!done)
1729 goto top;
1730 }
1731
1732 static void
1733 opt_cleanup()
1734 {
1735 free((void *)vnode_base);
1736 free((void *)vmap);
1737 free((void *)edges);
1738 free((void *)space);
1739 free((void *)levels);
1740 free((void *)blocks);
1741 }
1742
1743 /*
1744 * Return the number of stmts in 's'.
1745 */
1746 static int
1747 slength(s)
1748 struct slist *s;
1749 {
1750 int n = 0;
1751
1752 for (; s; s = s->next)
1753 if (s->s.code != NOP)
1754 ++n;
1755 return n;
1756 }
1757
1758 /*
1759 * Return the number of nodes reachable by 'p'.
1760 * All nodes should be initially unmarked.
1761 */
1762 static int
1763 count_blocks(p)
1764 struct block *p;
1765 {
1766 if (p == 0 || isMarked(p))
1767 return 0;
1768 Mark(p);
1769 return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1770 }
1771
1772 /*
1773 * Do a depth first search on the flow graph, numbering the
1774 * the basic blocks, and entering them into the 'blocks' array.`
1775 */
1776 static void
1777 number_blks_r(p)
1778 struct block *p;
1779 {
1780 int n;
1781
1782 if (p == 0 || isMarked(p))
1783 return;
1784
1785 Mark(p);
1786 n = n_blocks++;
1787 p->id = n;
1788 blocks[n] = p;
1789
1790 number_blks_r(JT(p));
1791 number_blks_r(JF(p));
1792 }
1793
1794 /*
1795 * Return the number of stmts in the flowgraph reachable by 'p'.
1796 * The nodes should be unmarked before calling.
1797 */
1798 static int
1799 count_stmts(p)
1800 struct block *p;
1801 {
1802 int n;
1803
1804 if (p == 0 || isMarked(p))
1805 return 0;
1806 Mark(p);
1807 n = count_stmts(JT(p)) + count_stmts(JF(p));
1808 return slength(p->stmts) + n + 1;
1809 }
1810
1811 /*
1812 * Allocate memory. All allocation is done before optimization
1813 * is begun. A linear bound on the size of all data structures is computed
1814 * from the total number of blocks and/or statements.
1815 */
1816 static void
1817 opt_init(root)
1818 struct block *root;
1819 {
1820 bpf_u_int32 *p;
1821 int i, n, max_stmts;
1822
1823 /*
1824 * First, count the blocks, so we can malloc an array to map
1825 * block number to block. Then, put the blocks into the array.
1826 */
1827 unMarkAll();
1828 n = count_blocks(root);
1829 blocks = (struct block **)malloc(n * sizeof(*blocks));
1830 unMarkAll();
1831 n_blocks = 0;
1832 number_blks_r(root);
1833
1834 n_edges = 2 * n_blocks;
1835 edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1836
1837 /*
1838 * The number of levels is bounded by the number of nodes.
1839 */
1840 levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1841
1842 edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1843 nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1844
1845 /* XXX */
1846 space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1847 + n_edges * edgewords * sizeof(*space));
1848 p = space;
1849 all_dom_sets = p;
1850 for (i = 0; i < n; ++i) {
1851 blocks[i]->dom = p;
1852 p += nodewords;
1853 }
1854 all_closure_sets = p;
1855 for (i = 0; i < n; ++i) {
1856 blocks[i]->closure = p;
1857 p += nodewords;
1858 }
1859 all_edge_sets = p;
1860 for (i = 0; i < n; ++i) {
1861 register struct block *b = blocks[i];
1862
1863 b->et.edom = p;
1864 p += edgewords;
1865 b->ef.edom = p;
1866 p += edgewords;
1867 b->et.id = i;
1868 edges[i] = &b->et;
1869 b->ef.id = n_blocks + i;
1870 edges[n_blocks + i] = &b->ef;
1871 b->et.pred = b;
1872 b->ef.pred = b;
1873 }
1874 max_stmts = 0;
1875 for (i = 0; i < n; ++i)
1876 max_stmts += slength(blocks[i]->stmts) + 1;
1877 /*
1878 * We allocate at most 3 value numbers per statement,
1879 * so this is an upper bound on the number of valnodes
1880 * we'll need.
1881 */
1882 maxval = 3 * max_stmts;
1883 vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1884 vnode_base = (struct valnode *)malloc(maxval * sizeof(*vmap));
1885 }
1886
1887 /*
1888 * Some pointers used to convert the basic block form of the code,
1889 * into the array form that BPF requires. 'fstart' will point to
1890 * the malloc'd array while 'ftail' is used during the recursive traversal.
1891 */
1892 static struct bpf_insn *fstart;
1893 static struct bpf_insn *ftail;
1894
1895 #ifdef BDEBUG
1896 int bids[1000];
1897 #endif
1898
1899 /*
1900 * Returns true if successful. Returns false if a branch has
1901 * an offset that is too large. If so, we have marked that
1902 * branch so that on a subsequent iteration, it will be treated
1903 * properly.
1904 */
1905 static int
1906 convert_code_r(p)
1907 struct block *p;
1908 {
1909 struct bpf_insn *dst;
1910 struct slist *src;
1911 int slen;
1912 u_int off;
1913 int extrajmps; /* number of extra jumps inserted */
1914
1915 if (p == 0 || isMarked(p))
1916 return (1);
1917 Mark(p);
1918
1919 if (convert_code_r(JF(p)) == 0)
1920 return (0);
1921 if (convert_code_r(JT(p)) == 0)
1922 return (0);
1923
1924 slen = slength(p->stmts);
1925 dst = ftail -= (slen + 1 + p->longjt + p->longjf);
1926 /* inflate length by any extra jumps */
1927
1928 p->offset = dst - fstart;
1929
1930 for (src = p->stmts; src; src = src->next) {
1931 if (src->s.code == NOP)
1932 continue;
1933 dst->code = (u_short)src->s.code;
1934 dst->k = src->s.k;
1935 ++dst;
1936 }
1937 #ifdef BDEBUG
1938 bids[dst - fstart] = p->id + 1;
1939 #endif
1940 dst->code = (u_short)p->s.code;
1941 dst->k = p->s.k;
1942 if (JT(p)) {
1943 extrajmps = 0;
1944 off = JT(p)->offset - (p->offset + slen) - 1;
1945 if (off >= 256) {
1946 /* offset too large for branch, must add a jump */
1947 if (p->longjt == 0) {
1948 /* mark this instruction and retry */
1949 p->longjt++;
1950 return(0);
1951 }
1952 /* branch if T to following jump */
1953 dst->jt = extrajmps;
1954 extrajmps++;
1955 dst[extrajmps].code = BPF_JMP|BPF_JA;
1956 dst[extrajmps].k = off - extrajmps;
1957 }
1958 else
1959 dst->jt = off;
1960 off = JF(p)->offset - (p->offset + slen) - 1;
1961 if (off >= 256) {
1962 /* offset too large for branch, must add a jump */
1963 if (p->longjf == 0) {
1964 /* mark this instruction and retry */
1965 p->longjf++;
1966 return(0);
1967 }
1968 /* branch if F to following jump */
1969 /* if two jumps are inserted, F goes to second one */
1970 dst->jf = extrajmps;
1971 extrajmps++;
1972 dst[extrajmps].code = BPF_JMP|BPF_JA;
1973 dst[extrajmps].k = off - extrajmps;
1974 }
1975 else
1976 dst->jf = off;
1977 }
1978 return (1);
1979 }
1980
1981
1982 /*
1983 * Convert flowgraph intermediate representation to the
1984 * BPF array representation. Set *lenp to the number of instructions.
1985 */
1986 struct bpf_insn *
1987 icode_to_fcode(root, lenp)
1988 struct block *root;
1989 int *lenp;
1990 {
1991 int n;
1992 struct bpf_insn *fp;
1993
1994 /*
1995 * Loop doing convert_codr_r() until no branches remain
1996 * with too-large offsets.
1997 */
1998 while (1) {
1999 unMarkAll();
2000 n = *lenp = count_stmts(root);
2001
2002 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2003 memset((char *)fp, 0, sizeof(*fp) * n);
2004 fstart = fp;
2005 ftail = fp + n;
2006
2007 unMarkAll();
2008 if (convert_code_r(root))
2009 break;
2010 free(fp);
2011 }
2012
2013 return fp;
2014 }
2015
2016 #ifdef BDEBUG
2017 static void
2018 opt_dump(root)
2019 struct block *root;
2020 {
2021 struct bpf_program f;
2022
2023 memset(bids, 0, sizeof bids);
2024 f.bf_insns = icode_to_fcode(root, &f.bf_len);
2025 bpf_dump(&f, 1);
2026 putchar('\n');
2027 free((char *)f.bf_insns);
2028 }
2029 #endif