]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ppc/net_filter.c
xnu-344.23.tar.gz
[apple/xnu.git] / osfmk / ppc / net_filter.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 *
25 */
26
27
28 #if NET_FILTER_COMPILER
29
30
31 #define USE_EXTRA_REGS 0
32
33 #define REG_ZERO 0 /* Register we keep equal to 0. */
34 #define REG_DATAADDR 3 /* Address of packet data, and filter return. */
35 #define REG_DATALEN 4 /* Length of packet data in two-byte units. */
36 #define REG_HDRADDR 5 /* Address of header data. */
37 #define REG_RET 3 /* Where to put return value. */
38
39 /* Originally we dealt in virtual register numbers which were essentially
40 indexes into this array, and only converted to machine register numbers
41 when emitting instructions. But that meant a lot of conversions, so
42 instead we deal with machine register numbers all along, even though this
43 means wasting slots in the regs[] array. */
44 const unsigned char scratchregs[] = {
45 6, 7, 8, 9, 10, 11, 12,
46 #if USE_EXTRA_REGS /* Callee-saves regs available if we save them. */
47 #define INITIAL_NSCRATCHREGS 8 /* Number of registers above. */
48 #error not yet written
49 #endif
50 };
51 #define NSCRATCHREGS (sizeof scratchregs / sizeof scratchregs[0])
52 #define NREGS 32
53 #define NO_REG 1 /* This is the stack pointer! Flag value. */
54
55 #define MAX_LI 0x7fff /* Max unsigned value in an LI. */
56
57 #define BCLR(cond) ((19 << 26) | (cond) | (16 << 1))
58 #define BLR() BCLR(COND_ALWAYS)
59 #define BC(cond, off) ((16 << 26) | (cond) | ((off) << 2))
60 #define COND(BO, BI) (((BO) << (16 + 5)) | ((BI) << 16))
61 #define COND_ALWAYS COND(COND_IF_ALWAYS, 0)
62 #define COND_EQ COND(COND_IF_TRUE, COND_BIT(0, BIT_EQ))
63 #define COND_NE COND(COND_IF_FALSE, COND_BIT(0, BIT_EQ))
64 #define COND_LE COND(COND_IF_FALSE, COND_BIT(0, BIT_GT))
65 #define COND_GE COND(COND_IF_FALSE, COND_BIT(0, BIT_LT))
66 #define COND_BIT(crf, bit) \
67 ((crf) * 4 + (bit))
68 #define BIT_EQ 2
69 #define BIT_GT 1
70 #define BIT_LT 0
71 #define COND_IF_FALSE 0x04
72 #define COND_IF_TRUE 0x0c
73 #define COND_IF_ALWAYS 0x14
74
75 /* For arithmetical instructions, a is the dest and b is the source;
76 for logical instructions, a is the source and b is the dest. Ho hum. */
77 #define IMMED(opcode, a, b, imm) \
78 (((opcode) << 26) | ((a) << 21) | ((b) << 16) | \
79 ((imm) & 0xffff))
80 #define ADDI(dst, src, imm) \
81 IMMED(14, dst, src, imm)
82 #define ADDIC(dst, src, imm) \
83 IMMED(12, dst, src, imm)
84 #define SUBFIC(dst, src, imm) \
85 IMMED(8, dst, src, imm)
86 #define LI(dst, imm) ADDI(dst, 0, (imm))
87 #define ANDI(dst, src, imm) \
88 IMMED(28, src, dst, imm)
89 #define ORI(dst, src, imm) \
90 IMMED(24, src, dst, imm)
91 #define XORI(dst, src, imm) \
92 IMMED(26, src, dst, imm)
93
94 #define CMPL(lhs, rhs) ((31 << 26) | ((lhs) << 16) | ((rhs) << 11) | (32 << 1))
95 #define CMPLI(lhs, imm) ((10 << 26) | ((lhs) << 16) | ((imm) & 0xffff))
96
97 #define INTEGER_OP(opcode, a, b, c) \
98 ((31 << 26) | ((a) << 21) | ((b) << 16) | \
99 ((c) << 11) | ((opcode) << 1))
100 #define ARITH_OP(opcode, dst, lhs, rhs) \
101 INTEGER_OP(opcode, dst, lhs, rhs)
102 #define ADD(dst, lhs, rhs) \
103 ARITH_OP(OP_ADD, dst, lhs, rhs)
104 #define ADDE(dst, lhs, rhs) \
105 ARITH_OP(OP_ADDE, dst, lhs, rhs)
106 #define SUBF(dst, lhs, rhs) \
107 ARITH_OP(OP_SUBF, dst, lhs, rhs)
108 #define SUBFC(dst, lhs, rhs) \
109 ARITH_OP(OP_SUBFC, dst, lhs, rhs)
110 #define SUBFE(dst, lhs, rhs) \
111 ARITH_OP(OP_SUBFE, dst, lhs, rhs)
112 #define LOGIC_OP(opcode, dst, lhs, rhs) \
113 INTEGER_OP(opcode, lhs, dst, rhs)
114 #define OR(dst, lhs, rhs) \
115 LOGIC_OP(OP_OR, dst, lhs, rhs)
116 #define XOR(dst, lhs, rhs) \
117 LOGIC_OP(OP_XOR, dst, lhs, rhs)
118 #define OP_ADD 266
119 #define OP_ADDE 138
120 #define OP_AND 28
121 #define OP_OR 444
122 #define OP_SRW 536
123 #define OP_SUBF 40
124 #define OP_SUBFC 8
125 #define OP_SUBFE 136
126 #define OP_XOR 316
127 #define MR(dst, src) OR(dst, src, src)
128
129 #define LHZ(dst, base, offset) \
130 ((40 << 26) | ((dst) << 21) | ((base) << 16) | \
131 ((offset) & 0xffff))
132 #define LHZX(dst, base, index) \
133 INTEGER_OP(279, dst, base, index)
134 #define MFCR(dst) INTEGER_OP(19, dst, 0, 0)
135
136 #define RLWINM(dst, src, shiftimm, mbegin, mend) \
137 ((21 << 26) | ((src) << 21) | ((dst) << 16) | \
138 ((shiftimm) << 11) | ((mbegin) << 6) | ((mend) << 1))
139 #define RLWNM(dst, src, shiftreg, mbegin, mend) \
140 ((23 << 26) | ((src) << 21) | ((dst) << 16) | \
141 ((shiftreg) << 11) | ((mbegin) << 6) | ((mend) << 1))
142
143 /* Every NETF_arg generates at most four instructions (4 for PUSHIND).
144 Every NETF_op generates at most 3 instructions (3 for EQ and NEQ). */
145 #define MAX_INSTR_PER_ARG 4
146 #define MAX_INSTR_PER_OP 3
147 #define MAX_INSTR_PER_ITEM (MAX_INSTR_PER_ARG + MAX_INSTR_PER_OP)
148 int junk_filter[MAX_INSTR_PER_ITEM];
149
150 enum {NF_LITERAL, NF_HEADER, NF_DATA};
151 struct common { /* Keeps track of values we might want to avoid reloading. */
152 char type; /* NF_LITERAL: immediate; NF_HEADER: header word;
153 NF_DATA: data word. */
154 char nuses; /* Number of remaining uses for this value. */
155 unsigned char reg;
156 /* Register this value is currently in, or NO_REG if none. */
157 unsigned short value;
158 /* Immediate value or header or data offset. */
159 };
160 struct reg { /* Keeps track of the current contents of registers. */
161 unsigned char commoni;
162 /* Index in common[] of the contained value. */
163 #define NOT_COMMON_VALUE NET_MAX_FILTER /* When not a common[] value. */
164 unsigned char stacktimes;
165 /* Number of times register appears in stack. */
166 };
167 struct local { /* Gather local arrays so we could kalloc() if needed. */
168 struct common common[NET_MAX_FILTER]; /* Potentially common values. */
169 struct reg regs[NREGS]; /* Register statuses. */
170 unsigned char commonpos[NET_MAX_FILTER]; /* Index in common[] for the
171 value loaded in each filter
172 command. */
173 unsigned char stackregs[NET_FILTER_STACK_DEPTH];
174 /* Registers making up the
175 stack. */
176 #if USE_EXTRA_REGS
177 unsigned char maxreg;
178 #endif
179 };
180
181 int allocate_register(struct local *s, int commoni);
182 int compile_preamble(int *instructions, struct local *s);
183
184 /* Compile a packet filter into POWERPC machine code. We do everything in
185 the 7 caller-saves registers listed in scratchregs[], except when
186 USE_EXTRA_REGS is defined, in which case we may also allocate callee-
187 saves registers if needed. (Not yet implemented on PPC.)
188
189 Rather than maintaining an explicit stack in memory, we allocate registers
190 dynamically to correspond to stack elements -- we can do this because we
191 know the state of the stack at every point in the filter program. We also
192 attempt to keep around in registers values (immediates, or header or data
193 words) that are used later on, to avoid having to load them again.
194 Since there are only 7 registers being used, we might be forced to reload
195 a value that we could have kept if we had more. We might even be unable
196 to contain the stack in the registers, in which case we return failure and
197 cause the filter to be interpreted by net_do_filter(). But for all current
198 filters I looked at, 7 registers is enough even to avoid reloads. When
199 USE_EXTRA_REGS is defined there are about 28 available registers, which is
200 plenty.
201
202 We depend heavily on NET_MAX_FILTER and NET_FILTER_STACK_DEPTH being
203 small. We keep indexes to arrays sized by them in char-sized fields,
204 originally because we tried allocating these arrays on the stack.
205 Even then we overflowed the small (4K) kernel stack, so we were forced
206 to allocate the arrays dynamically, which is the reason for the existence
207 of `struct local'.
208
209 We also depend on the filter being logically correct, for instance not
210 being longer than NET_MAX_FILTER or underflowing its stack. This is
211 supposed to have been checked by parse_net_filter() before the filter
212 is compiled.
213
214 We are supposed to return 1 (TRUE) if the filter accepts the packet
215 and 0 (FALSE) otherwise. In fact, we may return any non-zero value
216 for true, which is sufficient for our caller and convenient for us.
217
218 There are lots and lots of optimisations that we could do but don't.
219 This is supposedly a *micro*-kernel, after all. Here are some things
220 that could be added without too much headache:
221 - Using the condition register. We go to a lot of trouble to generate
222 integer truth values for EQ etc, but most of the time those values
223 are just ANDed or ORed together or used as arguments to COR etc. So
224 we could compute the comparison values directly into CR bits and
225 operate on them using the CR logical instructions without (most of
226 the time) ever having to generate integer equivalents.
227 - More registers. We could note the last uses of r3, r4, and
228 r5, and convert them to general registers after those uses. But if
229 register shortage turns out to be a problem it is probably best just
230 to define USE_EXTRA_REGS and have done with it.
231 - Minimising range checks. Every time we refer to a word in the data
232 part, we generate code to ensure that it is within bounds. But often
233 the truth of these tests is implied by earlier tests. Instead, at the
234 start of the filter and after every COR or CNAND we could insert
235 a single check when that is necessary. (After CAND and CNOR we don't
236 need to check since if they terminate it will be to return FALSE
237 anyway so all we'd do would be to return it prematurely.)
238 - Remembering immediate values. Instead of generating code as soon as we
239 see a PUSHLIT, we could remember that value and only generate code when
240 it is used. This would enable us to generate certain shorter
241 instructions (like addi) that incorporate the immediate value instead
242 of ever putting it in a register.
243 */
244
245 filter_fct_t
246 net_filter_alloc(filter_t *filter, unsigned int size, unsigned int *lenp)
247 {
248 struct local *s;
249 int len, oldi, i, j, t, ncommon, sp;
250 int type, value, arg, op, reg, reg1, dst, commoni;
251 int returnfalseoffset;
252 int *instructions, *instp, *returnfalse;
253 #if USE_EXTRA_REGS
254 int oldmaxreg;
255 #endif
256 boolean_t compiling;
257
258 #define SCHAR_MAX 127 /* machine/machlimits->h, anyone? */
259 assert(NET_MAX_FILTER <= SCHAR_MAX);
260 assert(NET_FILTER_STACK_DEPTH <= SCHAR_MAX);
261 assert(NREGS <= SCHAR_MAX);
262
263 assert(size < NET_MAX_FILTER);
264
265 s = (struct local *) kalloc(sizeof *s);
266
267 #if USE_EXTRA_REGS
268 s->maxreg = INITIAL_NSCRATCHREGS;
269 #endif
270 len = 0;
271 compiling = FALSE;
272 returnfalse = junk_filter;
273
274 /* This loop runs at least twice, once with compiling==FALSE to determine
275 the length of the instructions we will compile, and once with
276 compiling==TRUE to compile them. The code generated on the two passes
277 must be the same. In the USE_EXTRA_REGS case, the loop can be re-run
278 an extra time while !compiling, if we decide to use the callee-saves
279 registers. This is because we may be able to generate better code with
280 the help of these registers than before. */
281 while (1) {
282
283 /* Identify values that we can potentially preserve in a register to
284 avoid having to reload them. All immediate values and references to
285 known offsets in the header or data are candidates. The results of
286 this loop are the same on every run, so with a bit of work we
287 could run it just once; but this is not a time-critical
288 application. */
289 ncommon = 0;
290 for (i = 0; i < size; i++) {
291 oldi = i;
292 arg = NETF_ARG(filter[i]);
293 if (arg == NETF_PUSHLIT) {
294 type = NF_LITERAL;
295 value = filter[++i];
296 } else if (arg >= NETF_PUSHSTK) {
297 continue;
298 } else if (arg >= NETF_PUSHHDR) {
299 type = NF_HEADER;
300 value = arg - NETF_PUSHHDR;
301 } else if (arg >= NETF_PUSHWORD) {
302 type = NF_DATA;
303 value = arg - NETF_PUSHWORD;
304 } else {
305 continue;
306 }
307 for (j = 0; j < ncommon; j++) {
308 if (s->common[j].type == type && s->common[j].value == value) {
309 s->common[j].nuses++;
310 break;
311 }
312 }
313 if (j == ncommon) {
314 s->common[j].type = type;
315 s->common[j].value = value;
316 s->common[j].nuses = 1;
317 ncommon++;
318 }
319 s->commonpos[oldi] = j;
320 }
321
322 #if USE_EXTRA_REGS
323 oldmaxreg = s->maxreg;
324 #endif
325
326 /* Initially, no registers hold common values or are on the stack. */
327 for (i = 0; i < ncommon; i++)
328 s->common[i].reg = NO_REG;
329 for (i = 0; i < NSCRATCHREGS; i++) {
330 s->regs[scratchregs[i]].commoni = NOT_COMMON_VALUE;
331 s->regs[scratchregs[i]].stacktimes = 0;
332 }
333
334 /* Now read through the filter and generate code. */
335 sp = -1; /* sp points to top element */
336 for (i = 0; i < size; i++) {
337 if (!compiling)
338 instp = junk_filter;
339
340 assert(sp >= -1);
341 assert(sp < NET_FILTER_STACK_DEPTH - 1);
342 commoni = s->commonpos[i];
343 arg = NETF_ARG(filter[i]);
344 op = NETF_OP(filter[i]);
345
346 /* Generate code to get the required value into a register and
347 set `reg' to the number of this register. */
348 switch (arg) {
349 case NETF_PUSHLIT:
350 value = filter[++i];
351 reg = s->common[commoni].reg;
352 if (reg == NO_REG) {
353 if ((reg = allocate_register(s, commoni)) == NO_REG)
354 goto fail;
355 assert(value >= 0); /* Comes from unsigned short. */
356 *instp++ = ORI(reg, REG_ZERO, value);
357 }
358 s->common[commoni].nuses--;
359 break;
360 case NETF_NOPUSH:
361 reg = s->stackregs[sp--];
362 s->regs[reg].stacktimes--;
363 break;
364 case NETF_PUSHZERO:
365 reg = REG_ZERO;
366 break;
367 case NETF_PUSHIND:
368 case NETF_PUSHHDRIND:
369 reg1 = s->stackregs[sp--];
370 s->regs[reg1].stacktimes--;
371 if (arg == NETF_PUSHIND)
372 *instp++ = CMPL(reg1, REG_DATALEN);
373 else
374 *instp++ = CMPLI(reg1,
375 NET_HDW_HDR_MAX/sizeof (unsigned short));
376 *instp = BC(COND_GE, returnfalse - instp);
377 instp++;
378 if ((reg = allocate_register(s, -1)) == NO_REG)
379 goto fail;
380 *instp++ = ADD(reg, reg1, reg1);
381 *instp++ = LHZX(reg, (arg == NETF_PUSHIND) ?
382 REG_DATAADDR : REG_HDRADDR, reg);
383 break;
384 default:
385 if (arg >= NETF_PUSHSTK)
386 reg = s->stackregs[sp - (arg - NETF_PUSHSTK)];
387 else if (arg >= NETF_PUSHWORD) {
388 assert(2 * (NETF_PUSHHDR - NETF_PUSHWORD) <= MAX_LI);
389 assert(NETF_PUSHSTK - NETF_PUSHHDR <= MAX_LI);
390 reg = s->common[commoni].reg;
391 if (reg == NO_REG) {
392 if ((reg = allocate_register(s, commoni)) == NO_REG)
393 goto fail;
394 if (arg < NETF_PUSHHDR) {
395 value = arg - NETF_PUSHWORD;
396 *instp++ = CMPLI(REG_DATALEN, value);
397 *instp = BC(COND_LE, returnfalse - instp);
398 instp++;
399 reg1 = REG_DATAADDR;
400 } else {
401 value = arg - NETF_PUSHHDR;
402 reg1 = REG_HDRADDR;
403 }
404 *instp++ = LHZ(reg, reg1, 2 * value);
405 }
406 s->common[commoni].nuses--;
407 }
408 }
409
410 /* Now generate code to do `op' on `reg1' (lhs) and `reg' (rhs). */
411 if (op != NETF_NOP) {
412 reg1 = s->stackregs[sp--];
413 s->regs[reg1].stacktimes--;
414 }
415 switch (op) {
416 case NETF_OP(NETF_CAND):
417 case NETF_OP(NETF_COR):
418 case NETF_OP(NETF_CNAND):
419 case NETF_OP(NETF_CNOR):
420 dst = -1;
421 case NETF_OP(NETF_NOP):
422 break;
423 default:
424 /* Allocate a register to put the result in. */
425 if ((dst = allocate_register(s, -1)) == NO_REG)
426 goto fail;
427 }
428 switch (op) {
429 case NETF_OP(NETF_NOP):
430 dst = reg;
431 break;
432 case NETF_OP(NETF_EQ):
433 case NETF_OP(NETF_NEQ):
434 /* We arrange for the truth value to end up in the carry
435 flag and then put it in the destination register by
436 adding-with-carry zero to itself. To set the carry, we
437 first make a value `x' that is zero if the values are
438 equal; this is either their XOR, or, if we know the
439 rhs is 0, the lhs. Then to set the carry only when
440 x==0 we do `subfic dst,x,0' (subtract x from 0, setting
441 carry as not-borrow, so set only if x==0); to set it when
442 x!=0 we do `addic dst,x,-1' (add -1 to x setting carry,
443 so set unless x==0). We're only interested in the carry
444 from these operations, not dst.
445 We don't test if reg1==REG_ZERO since in practice you
446 write NETF_PUSHLIT|NETF_EQ; the other order is eccentric
447 so you get an extra instruction, tough. */
448 if (reg == REG_ZERO)
449 t = reg1;
450 else {
451 *instp++ = XOR(dst, reg1, reg);
452 t = dst;
453 }
454 *instp++ = (op == NETF_OP(NETF_EQ)) ?
455 SUBFIC(dst, t, 0) : ADDIC(dst, t, -1);
456 *instp++ = ADDE(dst, REG_ZERO, REG_ZERO);
457 break;
458 case NETF_OP(NETF_LT):
459 /* LT and GT take advantage of the fact that all numbers are
460 16-bit quantities, so the sign bit after a subtraction
461 is a reliable indication of the relative magnitudes of
462 the operands. */
463 *instp++ = SUBF(dst, reg, reg1); /* dst = reg1 - reg */
464 *instp++ = RLWINM(dst, dst, 1, 31, 31); /* sign bit */
465 break;
466 case NETF_OP(NETF_GT):
467 *instp++ = SUBF(dst, reg1, reg); /* dst = reg - reg1 */
468 *instp++ = RLWINM(dst, dst, 1, 31, 31); /* sign bit */
469 break;
470 case NETF_OP(NETF_LE):
471 /* LE and GE use the carry (= not-borrow) flag. When doing
472 a - b, there is a borrow if b > a, so carry if b <= a. */
473 *instp++ = SUBFC(dst, reg1, reg); /* dst = reg - reg1 */
474 *instp++ = ADDE(dst, REG_ZERO, REG_ZERO);/* ca if reg1 <= reg */
475 break;
476 case NETF_OP(NETF_GE):
477 *instp++ = SUBFC(dst, reg, reg1); /* dst = reg1 - reg */
478 *instp++ = ADDE(dst, REG_ZERO, REG_ZERO);/* ca if reg <= reg1 */
479 break;
480 case NETF_OP(NETF_AND):
481 j = OP_AND;
482 goto logical;
483 case NETF_OP(NETF_OR):
484 j = OP_OR;
485 goto logical;
486 case NETF_OP(NETF_XOR):
487 j = OP_XOR;
488 goto logical;
489 case NETF_OP(NETF_RSH):
490 j = OP_SRW;
491 logical:
492 *instp++ = LOGIC_OP(j, dst, reg1, reg);
493 break;
494 case NETF_OP(NETF_ADD):
495 j = OP_ADD;
496 goto arithmetical;
497 case NETF_OP(NETF_SUB):
498 j = OP_SUBF; /* First reg subtracted from second. */
499 arithmetical:
500 *instp++ = ARITH_OP(j, dst, reg, reg1);
501 *instp++ = ANDI(dst, dst, 0xffff);
502 break;
503 case NETF_OP(NETF_LSH):
504 *instp++ = RLWNM(dst, reg1, reg, 16, 31);
505 break;
506 case NETF_OP(NETF_COR):
507 case NETF_OP(NETF_CNAND):
508 *instp++ = CMPL(reg1, reg);
509 *instp++ = BCLR((op == NETF_OP(NETF_COR)) ? COND_EQ : COND_NE);
510 break;
511 case NETF_OP(NETF_CAND):
512 case NETF_OP(NETF_CNOR):
513 *instp++ = CMPL(reg1, reg);
514 *instp = BC((op == NETF_OP(NETF_CAND)) ? COND_NE : COND_EQ,
515 returnfalse - instp);
516 instp++;
517 break;
518 default:
519 printf("op == 0x%x\n", op);
520 panic("net_filter_alloc: bad op");
521 /* Should have been caught by parse_net_filter(). */
522 }
523 /* If the op generated a result, push it on the stack. */
524 if (dst >= 0) {
525 s->stackregs[++sp] = dst;
526 s->regs[dst].stacktimes++;
527 }
528 if (!compiling) {
529 assert(instp - junk_filter <= MAX_INSTR_PER_ITEM);
530 len += instp - junk_filter;
531 }
532 }
533 if (compiling) {
534 /* If the stack contains any values, we are supposed to return 0 or
535 1 according as the top-of-stack is zero or not. Since the only
536 place we are called requires just zero-false/nonzero-true, we
537 simply copy the value into r3. If the stack is empty, we
538 leave the pointer value r3 intact to return TRUE. */
539 if (sp >= 0)
540 *instp++ = MR(REG_RET, s->stackregs[sp]);
541 *instp++ = BLR();
542 /* Branch here to return false. We could avoid adding these
543 instructions if they are not used, but practically every
544 filter does use them (for failure values when trying to
545 access values beyond the header or data length) so it's
546 not worth the effort. */
547 assert(instp == returnfalse);
548 *instp++ = LI(REG_RET, 0);
549 *instp++ = BLR();
550 break;
551 } else {
552 len += 1 + (sp >= 0);
553 /* For the reach-the-end return instruction(s). */
554 #if USE_EXTRA_REGS
555 if (s->maxreg > oldmaxreg) {
556 len = 0;
557 continue;
558 }
559 #endif
560 len += compile_preamble(NULL, s);
561 returnfalseoffset = len;
562 len += 2; /* For the return-false instructions. */
563 }
564 if ((instructions = (int *) kalloc(len * sizeof (int))) == NULL)
565 return NULL;
566 returnfalse = instructions + returnfalseoffset;
567 instp = instructions;
568 instp += compile_preamble(instp, s);
569 compiling = TRUE;
570 }
571
572 assert(instp - instructions == len);
573 *lenp = len * sizeof (int);
574 {
575 kern_return_t kr;
576 vm_machine_attribute_val_t val = MATTR_VAL_CACHE_SYNC;
577
578 kr = pmap_attribute(kernel_pmap, (vm_offset_t) instructions,
579 len * sizeof (int), MATTR_CACHE, &val);
580 if (kr != KERN_SUCCESS) {
581 printf("net_filter_alloc: pmap_attribute -> 0x%x\n", kr);
582 return NULL;
583 }
584 }
585 kfree((vm_offset_t) s, sizeof *s);
586 return (filter_fct_t) instructions;
587 fail:
588 assert(!compiling);
589 kfree((vm_offset_t) s, sizeof *s);
590 printf("net_filter_alloc: failed to compile (filter too complex)\n");
591 printf("-- will work, but more slowly; consider enabling USE_EXTRA_REGS\n");
592 return NULL;
593 }
594
595
596 /* Allocate a register. Registers that are already being used to make up
597 the virtual stack are ineligible. Among the others, we choose the one
598 whose value has the least number of subsequent uses (ideally, and
599 usually, 0) of the common value it already holds. If commoni is >=
600 0, it is the index in common[] of the value we are going to put in
601 the allocated register, so we can update the various data structures
602 appropriately. */
603 int
604 allocate_register(struct local *s, int commoni)
605 {
606 int i, reg, bestreg, nuses, bestregnuses, maxreg;
607
608 bestreg = NO_REG;
609 #if USE_EXTRA_REGS
610 maxreg = s->maxreg;
611 #else
612 maxreg = NSCRATCHREGS;
613 #endif
614 while (1) {
615 bestregnuses = NOT_COMMON_VALUE;
616 for (i = 0; i < maxreg; i++) {
617 reg = scratchregs[i];
618 if (s->regs[reg].stacktimes == 0) {
619 nuses = (s->regs[reg].commoni == NOT_COMMON_VALUE) ?
620 0 : s->common[s->regs[reg].commoni].nuses;
621 if (nuses < bestregnuses) {
622 bestreg = reg;
623 bestregnuses = nuses;
624 }
625 }
626 }
627 if (bestreg != NO_REG)
628 break;
629 #if USE_EXTRA_REGS
630 if (maxreg == NSCRATCHREGS)
631 return NO_REG;
632 s->maxreg = ++maxreg;
633 #else
634 return NO_REG;
635 #endif
636 }
637 if (bestregnuses > 0)
638 printf("net_filter_alloc: forced to reallocate r%d\n", bestreg);
639 /* With USE_EXTRA_REGS, we could push up the number of registers
640 here to have one extra available for common values, but it's usually
641 not worth the overhead of the extra save-and-restore in the preamble.
642 Anyway, this never happens with typical filters. */
643 if (s->regs[bestreg].commoni != NOT_COMMON_VALUE)
644 s->common[s->regs[bestreg].commoni].reg = NO_REG;
645 if (commoni >= 0) {
646 s->regs[bestreg].commoni = commoni;
647 s->common[commoni].reg = bestreg;
648 } else
649 s->regs[bestreg].commoni = NOT_COMMON_VALUE;
650 return bestreg;
651 }
652
653
654 #define FIXED_PREAMBLE_INSTRUCTIONS 1
655
656 int
657 compile_preamble(int *instructions, struct local *s)
658 {
659 int *instp;
660 int len = FIXED_PREAMBLE_INSTRUCTIONS;
661 #if USE_EXTRA_REGS
662 #error this hp code must be ported to the ppc
663 int extra_regs, i, j, t, disp;
664
665 extra_regs = s->maxreg - INITIAL_NSCRATCHREGS;
666 if (extra_regs > 0) {
667 len = extra_regs * 2 + 4;
668 /* stw rp | (n-1) * stw | bl | stw | ldw rp | (n-1) * ldw | bv | ldw */
669 } else
670 return 0;
671 #endif
672 if (instructions == NULL)
673 return len;
674 instp = instructions;
675
676 *instp++ = LI(REG_ZERO, 0);
677 assert(instp - instructions == FIXED_PREAMBLE_INSTRUCTIONS);
678
679 #if USE_EXTRA_REGS
680 #error this hp code must be ported to the ppc
681 /* Generate a wrapper function to save the callee-saves registers
682 before invoking the filter code we have generated. It would be
683 marginally better to have the filter branch directly to the
684 postamble code on return, but the difference is trivial and it
685 is easier to have it always branch to (rp). */
686 #define FRAME_SIZE 128 /* This is plenty without being excessive. */
687 *instp++ = STW_NEG(REG_RTN, 20, REG_SP); /* stw rp,-20(sp) */
688 i = INITIAL_NSCRATCHREGS;
689 t = STWM(scratchregs[i], FRAME_SIZE, REG_SP); /* stwm r3,128(sp) */
690 j = FRAME_SIZE;
691 while (++i < s->maxreg) {
692 *instp++ = t;
693 j -= sizeof (int);
694 t = STW_NEG(scratchregs[i], j, REG_SP); /* stw r4,-124(sp) &c */
695 }
696 disp = extra_regs + 2; /* n * ldw | bv | ldw rp */
697 *instp++ = BL(disp, REG_RTN); /* bl filter,rp */
698 *instp++ = t; /* stw in delay slot */
699 *instp++ = LDW_NEG(FRAME_SIZE + 20, REG_SP, REG_RTN);
700 /* ldw -148(sp),rp */
701 while (--i > INITIAL_NSCRATCHREGS) {
702 *instp++ = LDW_NEG(j, REG_SP, scratchregs[i]); /* ldw -124(sp),r4 &c */
703 j += sizeof (int);
704 }
705 *instp++ = BV(0, REG_RTN); /* bv (rp) */
706 *instp++ = LDWM_NEG(FRAME_SIZE, REG_SP, scratchregs[i]);
707 /* ldwm -128(sp),r3
708 in delay slot */
709 #endif
710
711 assert(instp - instructions == len);
712 return len;
713 }
714
715 void
716 net_filter_free(filter_fct_t fp, unsigned int len)
717 {
718 kfree((vm_offset_t) fp, len);
719 }
720
721 #else /* NET_FILTER_COMPILER */
722
723 /*
724 * Compilation of a source network filter into ppc instructions
725 * - a small version that doesnt do anything, but doesn't take
726 * up any space either. Note that if using a single mklinux server
727 * with ethertalk enabled (standard situation), the filter passes
728 * everything through so no need to compile one. If running multi
729 * servers then there is more of a need. Ethertalk (in linux server)
730 * should really have a packet filter, but at time of writing
731 * it does not.
732 */
733 filter_fct_t
734 net_filter_alloc(
735 filter_t *fpstart,
736 unsigned int fplen,
737 unsigned int *len)
738 {
739 *len = 0;
740 return ((filter_fct_t)0);
741 }
742
743 void
744 net_filter_free(
745 filter_fct_t fp,
746 unsigned int len)
747 {
748 assert(fp == (filter_fct_t)0 && len == 0);
749 }
750 #endif /* NET_FILTER_COMPILER */