]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/bridge.c
xnu-344.tar.gz
[apple/xnu.git] / bsd / net / bridge.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * Copyright (c) 1998 Luigi Rizzo
24 *
25 * Redistribution and use in source and binary forms, with or without
26 * modification, are permitted provided that the following conditions
27 * are met:
28 * 1. Redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer.
30 * 2. Redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
45 *
9bccf70c 46 * $FreeBSD: src/sys/net/bridge.c,v 1.16.2.14 2001/02/09 23:13:41 luigi Exp $
1c79356b
A
47 */
48
49/*
50 * This code implements bridging in FreeBSD. It only acts on ethernet
51 * type of interfaces (others are still usable for routing).
52 * A bridging table holds the source MAC address/dest. interface for each
53 * known node. The table is indexed using an hash of the source address.
54 *
9bccf70c
A
55 * Input packets are tapped near the beginning of ether_input(), and
56 * analysed by calling bridge_in(). Depending on the result, the packet
1c79356b
A
57 * can be forwarded to one or more output interfaces using bdg_forward(),
58 * and/or sent to the upper layer (e.g. in case of multicast).
59 *
60 * Output packets are intercepted near the end of ether_output(),
9bccf70c 61 * the correct destination is selected calling bridge_dst_lookup(),
1c79356b
A
62 * and then forwarding is done using bdg_forward().
63 * Bridging is controlled by the sysctl variable net.link.ether.bridge
64 *
65 * The arp code is also modified to let a machine answer to requests
66 * irrespective of the port the request came from.
67 *
68 * In case of loops in the bridging topology, the bridge detects this
69 * event and temporarily mutes output bridging on one of the ports.
9bccf70c
A
70 * Periodically, interfaces are unmuted by bdg_timeout().
71 * Muting is only implemented as a safety measure, and also as
1c79356b
A
72 * a mechanism to support a user-space implementation of the spanning
73 * tree algorithm. In the final release, unmuting will only occur
74 * because of explicit action of the user-level daemon.
75 *
76 * To build a bridging kernel, use the following option
77 * option BRIDGE
78 * and then at runtime set the sysctl variable to enable bridging.
79 *
80 * Only one interface is supposed to have addresses set (but
81 * there are no problems in practice if you set addresses for more
82 * than one interface).
83 * Bridging will act before routing, but nothing prevents a machine
84 * from doing both (modulo bugs in the implementation...).
85 *
86 * THINGS TO REMEMBER
1c79356b
A
87 * - bridging is incompatible with multicast routing on the same
88 * machine. There is not an easy fix to this.
89 * - loop detection is still not very robust.
90 * - the interface of bdg_forward() could be improved.
91 */
92
93#include <sys/param.h>
94#include <sys/mbuf.h>
95#include <sys/malloc.h>
96#include <sys/systm.h>
97#include <sys/socket.h> /* for net/if.h */
98#include <sys/kernel.h>
99#include <sys/sysctl.h>
100
101#include <net/if.h>
102#include <net/if_types.h>
103
104#include <netinet/in.h> /* for struct arpcom */
105#include <netinet/in_systm.h>
106#include <netinet/in_var.h>
107#include <netinet/ip.h>
108#include <netinet/if_ether.h> /* for struct arpcom */
109
110#include "opt_ipfw.h"
111#include "opt_ipdn.h"
112
9bccf70c 113#if defined(IPFIREWALL)
1c79356b
A
114#include <net/route.h>
115#include <netinet/ip_fw.h>
9bccf70c 116#if defined(DUMMYNET)
1c79356b
A
117#include <netinet/ip_dummynet.h>
118#endif
9bccf70c 119#endif
1c79356b
A
120
121#include <net/bridge.h>
122
123/*
124 * For debugging, you can use the following macros.
125 * remember, rdtsc() only works on Pentium-class machines
126
127 quad_t ticks;
128 DDB(ticks = rdtsc();)
129 ... interesting code ...
130 DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;)
131
132 *
133 */
134
135#define DDB(x) x
136#define DEB(x)
137
1c79356b 138static void bdginit(void *);
9bccf70c 139static void bdgtakeifaces(void);
1c79356b 140static void flush_table(void);
9bccf70c
A
141static void bdg_promisc_on(void);
142static void parse_bdg_cfg(void);
1c79356b
A
143
144static int bdg_ipfw = 0 ;
145int do_bridge = 0;
146bdg_hash_table *bdg_table = NULL ;
147
148/*
9bccf70c
A
149 * System initialization
150 */
151
152SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, bdginit, NULL)
153
154static struct bdg_stats bdg_stats ;
155struct bdg_softc *ifp2sc = NULL ;
156/* XXX make it static of size BDG_MAX_PORTS */
157
158#define IFP_CHK(ifp, x) \
159 if (ifp2sc[ifp->if_index].magic != 0xDEADBEEF) { x ; }
160
161/*
162 * turn off promisc mode, optionally clear the IFF_USED flag.
163 * The flag is turned on by parse_bdg_config
1c79356b 164 */
9bccf70c
A
165static void
166bdg_promisc_off(int clear_used)
167{
1c79356b 168 struct ifnet *ifp ;
9bccf70c
A
169 TAILQ_FOREACH(ifp, &ifnet, if_link) {
170 if ( (ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) {
171 int s, ret ;
172 s = splimp();
173 ret = ifpromisc(ifp, 0);
174 splx(s);
175 ifp2sc[ifp->if_index].flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ;
176 DEB(printf(">> now %s%d promisc OFF if_flags 0x%x bdg_flags 0x%x\n",
177 ifp->if_name, ifp->if_unit,
178 ifp->if_flags, ifp2sc[ifp->if_index].flags);)
179 }
180 if (clear_used) {
181 ifp2sc[ifp->if_index].flags &= ~(IFF_USED) ;
182 bdg_stats.s[ifp->if_index].name[0] = '\0';
183 }
184 }
185}
186
187/*
188 * set promisc mode on the interfaces we use.
189 */
190static void
191bdg_promisc_on()
192{
193 struct ifnet *ifp ;
194 int s ;
195
196 TAILQ_FOREACH(ifp, &ifnet, if_link) {
197 if ( !BDG_USED(ifp) )
198 continue ;
199 if ( 0 == ( ifp->if_flags & IFF_UP) ) {
200 s = splimp();
201 if_up(ifp);
202 splx(s);
203 }
204 if ( !(ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) {
205 int ret ;
206 s = splimp();
207 ret = ifpromisc(ifp, 1);
208 splx(s);
209 ifp2sc[ifp->if_index].flags |= IFF_BDG_PROMISC ;
210 printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n",
211 ifp->if_name, ifp->if_unit,
212 ifp->if_flags, ifp2sc[ifp->if_index].flags);
213 }
214 if (BDG_MUTED(ifp)) {
215 printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit);
216 BDG_UNMUTE(ifp) ;
217 }
218 }
219}
1c79356b
A
220
221static int
9bccf70c 222sysctl_bdg(SYSCTL_HANDLER_ARGS)
1c79356b
A
223{
224 int error, oldval = do_bridge ;
225
226 error = sysctl_handle_int(oidp,
227 oidp->oid_arg1, oidp->oid_arg2, req);
9bccf70c 228 DEB( printf("called sysctl for bridge name %s arg2 %d val %d->%d\n",
1c79356b 229 oidp->oid_name, oidp->oid_arg2,
9bccf70c
A
230 oldval, do_bridge); )
231
1c79356b
A
232 if (bdg_table == NULL)
233 do_bridge = 0 ;
234 if (oldval != do_bridge) {
9bccf70c 235 bdg_promisc_off( 1 ); /* reset previously used interfaces */
1c79356b 236 flush_table();
9bccf70c
A
237 if (do_bridge) {
238 parse_bdg_cfg();
239 bdg_promisc_on();
240 }
1c79356b
A
241 }
242 return error ;
243}
244
9bccf70c
A
245static char bridge_cfg[256] = { "" } ;
246
247/*
248 * parse the config string, set IFF_USED, name and cluster_id
249 * for all interfaces found.
250 */
251static void
252parse_bdg_cfg()
253{
254 char *p, *beg ;
255 int i, l, cluster;
256 struct bdg_softc *b;
257
258 for (p= bridge_cfg; *p ; p++) {
259 /* interface names begin with [a-z] and continue up to ':' */
260 if (*p < 'a' || *p > 'z')
261 continue ;
262 for ( beg = p ; *p && *p != ':' ; p++ )
263 ;
264 if (*p == 0) /* end of string, ':' not found */
265 return ;
266 l = p - beg ; /* length of name string */
267 p++ ;
268 DEB(printf("-- match beg(%d) <%s> p <%s>\n", l, beg, p);)
269 for (cluster = 0 ; *p && *p >= '0' && *p <= '9' ; p++)
270 cluster = cluster*10 + (*p -'0');
271 /*
272 * now search in bridge strings
273 */
274 for (i=0, b = ifp2sc ; i < if_index ; i++, b++) {
275 char buf[32];
276 struct ifnet *ifp = b->ifp ;
277
278 if (ifp == NULL)
279 continue;
280 sprintf(buf, "%s%d", ifp->if_name, ifp->if_unit);
281 if (!strncmp(beg, buf, l)) { /* XXX not correct for >10 if! */
282 b->cluster_id = htons(cluster) ;
283 b->flags |= IFF_USED ;
284 sprintf(bdg_stats.s[ifp->if_index].name,
285 "%s%d:%d", ifp->if_name, ifp->if_unit, cluster);
286
287 DEB(printf("--++ found %s\n",
288 bdg_stats.s[ifp->if_index].name);)
289 break ;
290 }
291 }
292 if (*p == '\0')
293 break ;
294 }
295}
296
297static int
298sysctl_bdg_cfg(SYSCTL_HANDLER_ARGS)
299{
300 int error = 0 ;
301 char oldval[256] ;
302
303 strcpy(oldval, bridge_cfg) ;
304
305 error = sysctl_handle_string(oidp,
306 bridge_cfg, oidp->oid_arg2, req);
307 DEB(
308 printf("called sysctl for bridge name %s arg2 %d err %d val %s->%s\n",
309 oidp->oid_name, oidp->oid_arg2,
310 error,
311 oldval, bridge_cfg);
312 )
313 if (strcmp(oldval, bridge_cfg)) {
314 bdg_promisc_off( 1 ); /* reset previously-used interfaces */
315 flush_table();
316 parse_bdg_cfg(); /* and set new ones... */
317 if (do_bridge)
318 bdg_promisc_on(); /* re-enable interfaces */
319 }
320 return error ;
321}
322
323static int
324sysctl_refresh(SYSCTL_HANDLER_ARGS)
325{
326 if (req->newptr)
327 bdgtakeifaces();
328
329 return 0;
330}
331
332
1c79356b 333SYSCTL_DECL(_net_link_ether);
9bccf70c
A
334SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_cfg, CTLTYPE_STRING|CTLFLAG_RW,
335 &bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A",
336 "Bridge configuration");
337
1c79356b 338SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW,
9bccf70c
A
339 &do_bridge, 0, &sysctl_bdg, "I", "Bridging");
340
341SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW,
342 &bdg_ipfw,0,"Pass bridged pkts through firewall");
343
344#define SY(parent, var, comment) \
345 static int var ; \
346 SYSCTL_INT(parent, OID_AUTO, var, CTLFLAG_RW, &(var), 0, comment);
347
348int bdg_ipfw_drops;
349SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_drop,
350 CTLFLAG_RW, &bdg_ipfw_drops,0,"");
351
352int bdg_ipfw_colls;
353SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_collisions,
354 CTLFLAG_RW, &bdg_ipfw_colls,0,"");
355
356SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_refresh, CTLTYPE_INT|CTLFLAG_WR,
357 NULL, 0, &sysctl_refresh, "I", "iface refresh");
1c79356b 358
1c79356b 359#if 1 /* diagnostic vars */
9bccf70c
A
360
361SY(_net_link_ether, verbose, "Be verbose");
362SY(_net_link_ether, bdg_split_pkts, "Packets split in bdg_forward");
363
364SY(_net_link_ether, bdg_thru, "Packets through bridge");
365
366SY(_net_link_ether, bdg_copied, "Packets copied in bdg_forward");
367
368SY(_net_link_ether, bdg_copy, "Force copy in bdg_forward");
369SY(_net_link_ether, bdg_predict, "Correctly predicted header location");
370
371SY(_net_link_ether, bdg_fw_avg, "Cycle counter avg");
372SY(_net_link_ether, bdg_fw_ticks, "Cycle counter item");
373SY(_net_link_ether, bdg_fw_count, "Cycle counter count");
1c79356b 374#endif
9bccf70c 375
1c79356b
A
376SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats,
377 CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics");
378
379static int bdg_loops ;
380
381/*
382 * completely flush the bridge table.
383 */
384static void
385flush_table()
386{
387 int s,i;
388
389 if (bdg_table == NULL)
390 return ;
391 s = splimp();
392 for (i=0; i< HASH_SIZE; i++)
393 bdg_table[i].name= NULL; /* clear table */
394 splx(s);
395}
396
397/* wrapper for funnel */
398void
399bdg_timeout_funneled(void * dummy)
400{
401 boolean_t funnel_state;
402
403 funnel_state = thread_funnel_set(network_flock, TRUE);
404 bdg_timeout(dummy);
405 funnel_state = thread_funnel_set(network_flock, FALSE);
406}
407
408/*
409 * called periodically to flush entries etc.
410 */
411static void
412bdg_timeout(void *dummy)
413{
1c79356b 414 static int slowtimer = 0 ;
9bccf70c 415
1c79356b
A
416 if (do_bridge) {
417 static int age_index = 0 ; /* index of table position to age */
418 int l = age_index + HASH_SIZE/4 ;
419 /*
420 * age entries in the forwarding table.
421 */
422 if (l > HASH_SIZE)
423 l = HASH_SIZE ;
424 for (; age_index < l ; age_index++)
425 if (bdg_table[age_index].used)
426 bdg_table[age_index].used = 0 ;
427 else if (bdg_table[age_index].name) {
428 /* printf("xx flushing stale entry %d\n", age_index); */
429 bdg_table[age_index].name = NULL ;
430 }
431 if (age_index >= HASH_SIZE)
432 age_index = 0 ;
433
434 if (--slowtimer <= 0 ) {
435 slowtimer = 5 ;
436
9bccf70c 437 bdg_promisc_on() ; /* we just need unmute, really */
1c79356b
A
438 bdg_loops = 0 ;
439 }
440 }
441 timeout(bdg_timeout_funneled, (void *)0, 2*hz );
1c79356b
A
442}
443
444/*
445 * local MAC addresses are held in a small array. This makes comparisons
446 * much faster.
447 */
9bccf70c 448bdg_addr bdg_addresses[BDG_MAX_PORTS];
1c79356b
A
449int bdg_ports ;
450
451/*
9bccf70c
A
452 * initialization of bridge code. This needs to be done after all
453 * interfaces have been configured.
1c79356b
A
454 */
455static void
9bccf70c 456bdginit(void *dummy)
1c79356b 457{
9bccf70c 458
1c79356b
A
459 if (bdg_table == NULL)
460 bdg_table = (struct hash_table *)
461 _MALLOC(HASH_SIZE * sizeof(struct hash_table),
462 M_IFADDR, M_WAITOK);
463 flush_table();
464
9bccf70c
A
465 ifp2sc = _MALLOC(BDG_MAX_PORTS * sizeof(struct bdg_softc),
466 M_IFADDR, M_WAITOK );
467 bzero(ifp2sc, BDG_MAX_PORTS * sizeof(struct bdg_softc) );
1c79356b
A
468
469 bzero(&bdg_stats, sizeof(bdg_stats) );
9bccf70c
A
470 bdgtakeifaces();
471 bdg_timeout(0);
472 do_bridge=0;
473}
474
475void
476bdgtakeifaces(void)
477{
478 int i ;
479 struct ifnet *ifp;
480 struct arpcom *ac ;
481 bdg_addr *p = bdg_addresses ;
482 struct bdg_softc *bp;
483
1c79356b 484 bdg_ports = 0 ;
9bccf70c 485 *bridge_cfg = '\0';
1c79356b 486
9bccf70c 487 printf("BRIDGE 010131, have %d interfaces\n", if_index);
1c79356b 488 for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ;
9bccf70c 489 i++, ifp = TAILQ_NEXT(ifp, if_link) )
1c79356b 490 if (ifp->if_type == IFT_ETHER) { /* ethernet ? */
9bccf70c 491 bp = &ifp2sc[ifp->if_index] ;
1c79356b 492 ac = (struct arpcom *)ifp;
9bccf70c
A
493 sprintf(bridge_cfg + strlen(bridge_cfg),
494 "%s%d:1,", ifp->if_name, ifp->if_unit);
495 printf("-- index %d %s type %d phy %d addrl %d addr %6D\n",
496 ifp->if_index,
497 bdg_stats.s[ifp->if_index].name,
498 (int)ifp->if_type, (int) ifp->if_physical,
499 (int)ifp->if_addrlen,
500 ac->ac_enaddr, "." );
501 bcopy(ac->ac_enaddr, p->etheraddr, 6);
502 p++ ;
503 bp->ifp = ifp ;
504 bp->flags = IFF_USED ;
505 bp->cluster_id = htons(1) ;
506 bp->magic = 0xDEADBEEF ;
507
508 sprintf(bdg_stats.s[ifp->if_index].name,
509 "%s%d:%d", ifp->if_name, ifp->if_unit,
510 ntohs(bp->cluster_id));
511 bdg_ports ++ ;
512 }
513
1c79356b
A
514}
515
516/*
517 * bridge_in() is invoked to perform bridging decision on input packets.
9bccf70c 518 *
1c79356b 519 * On Input:
9bccf70c 520 * eh Ethernet header of the incoming packet.
1c79356b
A
521 *
522 * On Return: destination of packet, one of
523 * BDG_BCAST broadcast
524 * BDG_MCAST multicast
525 * BDG_LOCAL is only for a local address (do not forward)
526 * BDG_DROP drop the packet
527 * ifp ifp of the destination interface.
528 *
529 * Forwarding is not done directly to give a chance to some drivers
530 * to fetch more of the packet, or simply drop it completely.
531 */
532
1c79356b 533struct ifnet *
9bccf70c 534bridge_in(struct ifnet *ifp, struct ether_header *eh)
1c79356b
A
535{
536 int index;
9bccf70c
A
537 struct ifnet *dst , *old ;
538 int dropit = BDG_MUTED(ifp) ;
1c79356b
A
539
540 /*
541 * hash the source address
542 */
543 index= HASH_FN(eh->ether_shost);
544 bdg_table[index].used = 1 ;
545 old = bdg_table[index].name ;
546 if ( old ) { /* the entry is valid. */
9bccf70c
A
547 IFP_CHK(old, printf("bridge_in-- reading table\n") );
548
1c79356b 549 if (!BDG_MATCH( eh->ether_shost, bdg_table[index].etheraddr) ) {
9bccf70c 550 bdg_ipfw_colls++ ;
1c79356b
A
551 bdg_table[index].name = NULL ;
552 } else if (old != ifp) {
553 /*
554 * found a loop. Either a machine has moved, or there
555 * is a misconfiguration/reconfiguration of the network.
556 * First, do not forward this packet!
557 * Record the relocation anyways; then, if loops persist,
558 * suspect a reconfiguration and disable forwarding
559 * from the old interface.
560 */
561 bdg_table[index].name = ifp ; /* relocate address */
562 printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n",
563 bdg_loops, eh->ether_shost, ".",
564 ifp->if_name, ifp->if_unit,
565 old->if_name, old->if_unit,
9bccf70c 566 BDG_MUTED(old) ? "muted":"active");
1c79356b 567 dropit = 1 ;
9bccf70c 568 if ( !BDG_MUTED(old) ) {
1c79356b 569 if (++bdg_loops > 10)
9bccf70c 570 BDG_MUTE(old) ;
1c79356b
A
571 }
572 }
573 }
574
575 /*
576 * now write the source address into the table
577 */
578 if (bdg_table[index].name == NULL) {
579 DEB(printf("new addr %6D at %d for %s%d\n",
580 eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit);)
581 bcopy(eh->ether_shost, bdg_table[index].etheraddr, 6);
582 bdg_table[index].name = ifp ;
583 }
9bccf70c 584 dst = bridge_dst_lookup(eh);
1c79356b
A
585 /* Return values:
586 * BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp.
587 * For muted interfaces, the first 3 are changed in BDG_LOCAL,
588 * and others to BDG_DROP. Also, for incoming packets, ifp is changed
589 * to BDG_DROP in case ifp == src . These mods are not necessary
590 * for outgoing packets from ether_output().
591 */
592 BDG_STAT(ifp, BDG_IN);
593 switch ((int)dst) {
594 case (int)BDG_BCAST:
595 case (int)BDG_MCAST:
596 case (int)BDG_LOCAL:
597 case (int)BDG_UNKNOWN:
598 case (int)BDG_DROP:
599 BDG_STAT(ifp, dst);
600 break ;
601 default :
602 if (dst == ifp || dropit )
603 BDG_STAT(ifp, BDG_DROP);
604 else
605 BDG_STAT(ifp, BDG_FORWARD);
606 break ;
607 }
608
609 if ( dropit ) {
610 if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL)
611 return BDG_LOCAL ;
612 else
613 return BDG_DROP ;
614 } else {
615 return (dst == ifp ? BDG_DROP : dst ) ;
616 }
617}
618
619/*
9bccf70c
A
620 * Forward to dst, excluding src port and muted interfaces.
621 * If src == NULL, the pkt comes from ether_output, and dst is the real
622 * interface the packet is originally sent to. In this case we must forward
623 * it to the whole cluster. We never call bdg_forward ether_output on
624 * interfaces which are not part of a cluster.
625 *
626 * The packet is freed if possible (i.e. surely not of interest for
627 * the upper layer), otherwise a copy is left for use by the caller
628 * (pointer in m0).
629 *
630 * It would be more efficient to make bdg_forward() always consume
631 * the packet, leaving to the caller the task to check if it needs a copy
632 * and get one in case. As it is now, bdg_forward() can sometimes make
633 * a copy whereas it is not necessary.
634 *
635 * XXX be careful about eh, it can be a pointer into *m
1c79356b 636 */
9bccf70c
A
637struct mbuf *
638bdg_forward(struct mbuf *m0, struct ether_header *const eh, struct ifnet *dst)
1c79356b 639{
9bccf70c
A
640 struct ifnet *src = m0->m_pkthdr.rcvif; /* could be NULL in output */
641 struct ifnet *ifp, *last = NULL ;
642 int s ;
643 int shared = bdg_copy ; /* someone else is using the mbuf */
644 int once = 0; /* loop only once */
645 struct ifnet *real_dst = dst ; /* real dst from ether_output */
646#ifdef IPFIREWALL
647 struct ip_fw_chain *rule = NULL ; /* did we match a firewall rule ? */
648#endif
1c79356b 649
9bccf70c
A
650 /*
651 * XXX eh is usually a pointer within the mbuf (some ethernet drivers
652 * do that), so we better copy it before doing anything with the mbuf,
653 * or we might corrupt the header.
654 */
655 struct ether_header save_eh = *eh ;
656
657#if defined(IPFIREWALL) && defined(DUMMYNET)
658 if (m0->m_type == MT_DUMMYNET) {
659 /* extract info from dummynet header */
660 rule = (struct ip_fw_chain *)(m0->m_data) ;
661 m0 = m0->m_next ;
662 src = m0->m_pkthdr.rcvif;
663 shared = 0 ; /* For sure this is our own mbuf. */
664 } else
665#endif
666 bdg_thru++; /* only count once */
1c79356b 667
9bccf70c
A
668 if (src == NULL) /* packet from ether_output */
669 dst = bridge_dst_lookup(eh);
1c79356b 670 if (dst == BDG_DROP) { /* this should not happen */
9bccf70c
A
671 printf("xx bdg_forward for BDG_DROP\n");
672 m_freem(m0);
673 return NULL;
1c79356b
A
674 }
675 if (dst == BDG_LOCAL) { /* this should not happen as well */
676 printf("xx ouch, bdg_forward for local pkt\n");
9bccf70c 677 return m0;
1c79356b
A
678 }
679 if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) {
9bccf70c 680 ifp = ifnet.tqh_first ; /* scan all ports */
1c79356b 681 once = 0 ;
9bccf70c
A
682 if (dst != BDG_UNKNOWN) /* need a copy for the local stack */
683 shared = 1 ;
1c79356b
A
684 } else {
685 ifp = dst ;
9bccf70c 686 once = 1 ;
1c79356b 687 }
9bccf70c
A
688 if ( (u_int)(ifp) <= (u_int)BDG_FORWARD )
689 panic("bdg_forward: bad dst");
690
691#ifdef IPFIREWALL
1c79356b 692 /*
9bccf70c
A
693 * Do filtering in a very similar way to what is done in ip_output.
694 * Only if firewall is loaded, enabled, and the packet is not
695 * from ether_output() (src==NULL, or we would filter it twice).
696 * Additional restrictions may apply e.g. non-IP, short packets,
697 * and pkts already gone through a pipe.
1c79356b 698 */
9bccf70c
A
699 if (ip_fw_chk_ptr && bdg_ipfw != 0 && src != NULL) {
700 struct ip *ip ;
701 int i;
702
703 if (rule != NULL) /* dummynet packet, already partially processed */
704 goto forward; /* HACK! I should obey the fw_one_pass */
705 if (ntohs(save_eh.ether_type) != ETHERTYPE_IP)
706 goto forward ; /* not an IP packet, ipfw is not appropriate */
707 if (m0->m_pkthdr.len < sizeof(struct ip) )
708 goto forward ; /* header too short for an IP pkt, cannot filter */
709 /*
710 * i need some amt of data to be contiguous, and in case others need
711 * the packet (shared==1) also better be in the first mbuf.
712 */
713 i = min(m0->m_pkthdr.len, max_protohdr) ;
714 if ( shared || m0->m_len < i) {
715 m0 = m_pullup(m0, i) ;
716 if (m0 == NULL) {
717 printf("-- bdg: pullup failed.\n") ;
718 return NULL ;
719 }
1c79356b 720 }
9bccf70c 721
1c79356b
A
722 /*
723 * before calling the firewall, swap fields the same as IP does.
724 * here we assume the pkt is an IP one and the header is contiguous
725 */
9bccf70c 726 ip = mtod(m0, struct ip *);
1c79356b 727 NTOHS(ip->ip_len);
1c79356b
A
728 NTOHS(ip->ip_off);
729
730 /*
9bccf70c 731 * The third parameter to the firewall code is the dst. interface.
1c79356b 732 * Since we apply checks only on input pkts we use NULL.
9bccf70c
A
733 * The firewall knows this is a bridged packet as the cookie ptr
734 * is NULL.
1c79356b 735 */
9bccf70c
A
736 i = (*ip_fw_chk_ptr)(&ip, 0, NULL, NULL /* cookie */, &m0, &rule, NULL);
737 if ( (i & IP_FW_PORT_DENY_FLAG) || m0 == NULL) /* drop */
738 return m0 ;
1c79356b 739 /*
9bccf70c
A
740 * If we get here, the firewall has passed the pkt, but the mbuf
741 * pointer might have changed. Restore ip and the fields NTOHS()'d.
1c79356b 742 */
9bccf70c 743 ip = mtod(m0, struct ip *);
1c79356b 744 HTONS(ip->ip_len);
1c79356b 745 HTONS(ip->ip_off);
9bccf70c
A
746
747 if (i == 0) /* a PASS rule. */
1c79356b 748 goto forward ;
9bccf70c
A
749#ifdef DUMMYNET
750 if (i & IP_FW_PORT_DYNT_FLAG) {
1c79356b 751 /*
9bccf70c
A
752 * Pass the pkt to dummynet, which consumes it.
753 * If shared, make a copy and keep the original.
754 * Need to prepend the ethernet header, optimize the common
755 * case of eh pointing already into the original mbuf.
1c79356b 756 */
9bccf70c
A
757 struct mbuf *m ;
758 if (shared) {
759 m = m_copypacket(m0, M_DONTWAIT);
760 if (m == NULL) {
761 printf("bdg_fwd: copy(1) failed\n");
762 return m0;
763 }
764 } else {
765 m = m0 ; /* pass the original to dummynet */
766 m0 = NULL ; /* and nothing back to the caller */
767 }
768 if ( (void *)(eh + 1) == (void *)m->m_data) {
769 m->m_data -= ETHER_HDR_LEN ;
770 m->m_len += ETHER_HDR_LEN ;
771 m->m_pkthdr.len += ETHER_HDR_LEN ;
772 bdg_predict++;
773 } else {
774 M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
775 if (!m && verbose) printf("M_PREPEND failed\n");
776 if (m == NULL) /* nope... */
777 return m0 ;
778 bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN);
779 }
780 dummynet_io((i & 0xffff),DN_TO_BDG_FWD,m,real_dst,NULL,0,rule,0);
781 return m0 ;
1c79356b
A
782 }
783#endif
9bccf70c
A
784 /*
785 * XXX add divert/forward actions...
786 */
1c79356b 787 /* if none of the above matches, we have to drop the pkt */
9bccf70c
A
788 bdg_ipfw_drops++ ;
789 printf("bdg_forward: No rules match, so dropping packet!\n");
790 return m0 ;
1c79356b
A
791 }
792forward:
9bccf70c
A
793#endif /* IPFIREWALL */
794 /*
795 * Again, bring up the headers in case of shared bufs to avoid
796 * corruptions in the future.
797 */
798 if ( shared ) {
799 int i = min(m0->m_pkthdr.len, max_protohdr) ;
800
801 m0 = m_pullup(m0, i) ;
802 if (m0 == NULL) {
803 printf("-- bdg: pullup2 failed.\n") ;
804 return NULL ;
805 }
806 }
807 /* now real_dst is used to determine the cluster where to forward */
808 if (src != NULL) /* pkt comes from ether_input */
809 real_dst = src ;
810 for (;;) {
811 if (last) { /* need to forward packet leftover from previous loop */
812 struct mbuf *m ;
813 if (shared == 0 && once ) { /* no need to copy */
814 m = m0 ;
815 m0 = NULL ; /* original is gone */
816 } else {
817 m = m_copypacket(m0, M_DONTWAIT);
1c79356b 818 if (m == NULL) {
9bccf70c
A
819 printf("bdg_forward: sorry, m_copypacket failed!\n");
820 return m0 ; /* the original is still there... */
1c79356b
A
821 }
822 }
823 /*
9bccf70c
A
824 * Add header (optimized for the common case of eh pointing
825 * already into the mbuf) and execute last part of ether_output:
826 * queue pkt and start output if interface not yet active.
1c79356b 827 */
9bccf70c
A
828 if ( (void *)(eh + 1) == (void *)m->m_data) {
829 m->m_data -= ETHER_HDR_LEN ;
830 m->m_len += ETHER_HDR_LEN ;
831 m->m_pkthdr.len += ETHER_HDR_LEN ;
832 bdg_predict++;
833 } else {
834 M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
835 if (!m && verbose) printf("M_PREPEND failed\n");
836 if (m == NULL)
837 return m0;
838 bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN);
839 }
1c79356b 840 s = splimp();
9bccf70c
A
841 if (IF_QFULL(&last->if_snd)) {
842 IF_DROP(&last->if_snd);
843#if 0
844 BDG_MUTE(last); /* should I also mute ? */
845#endif
1c79356b 846 splx(s);
9bccf70c 847 m_freem(m); /* consume the pkt anyways */
1c79356b 848 } else {
9bccf70c 849 last->if_obytes += m->m_pkthdr.len ;
1c79356b 850 if (m->m_flags & M_MCAST)
9bccf70c
A
851 last->if_omcasts++;
852 if (m->m_pkthdr.len != m->m_len) /* this pkt is on >1 bufs */
853 bdg_split_pkts++;
854
855 IF_ENQUEUE(&last->if_snd, m);
856 if ((last->if_flags & IFF_OACTIVE) == 0)
857 (*last->if_start)(last);
1c79356b 858 splx(s);
1c79356b 859 }
9bccf70c
A
860 BDG_STAT(last, BDG_OUT);
861 last = NULL ;
862 if (once)
863 break ;
1c79356b 864 }
9bccf70c 865 if (ifp == NULL)
1c79356b 866 break ;
9bccf70c
A
867 /*
868 * If the interface is used for bridging, not muted, not full,
869 * up and running, is not the source interface, and belongs to
870 * the same cluster as the 'real_dst', then send here.
871 */
872 if ( BDG_USED(ifp) && !BDG_MUTED(ifp) && !IF_QFULL(&ifp->if_snd) &&
873 (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) &&
874 ifp != src && BDG_SAMECLUSTER(ifp, real_dst) )
875 last = ifp ;
876 ifp = TAILQ_NEXT(ifp, if_link) ;
877 if (ifp == NULL)
878 once = 1 ;
1c79356b 879 }
9bccf70c
A
880 DEB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;
881 if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; )
882 return m0 ;
1c79356b 883}