]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/bridge.c
xnu-1456.1.26.tar.gz
[apple/xnu.git] / bsd / net / bridge.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1998 Luigi Rizzo
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 * SUCH DAMAGE.
51 *
52 * $FreeBSD: src/sys/net/bridge.c,v 1.16.2.14 2001/02/09 23:13:41 luigi Exp $
53 */
54
55 /*
56 * This code implements bridging in FreeBSD. It only acts on ethernet
57 * type of interfaces (others are still usable for routing).
58 * A bridging table holds the source MAC address/dest. interface for each
59 * known node. The table is indexed using an hash of the source address.
60 *
61 * Input packets are tapped near the beginning of ether_input(), and
62 * analysed by calling bridge_in(). Depending on the result, the packet
63 * can be forwarded to one or more output interfaces using bdg_forward(),
64 * and/or sent to the upper layer (e.g. in case of multicast).
65 *
66 * Output packets are intercepted near the end of ether_output(),
67 * the correct destination is selected calling bridge_dst_lookup(),
68 * and then forwarding is done using bdg_forward().
69 * Bridging is controlled by the sysctl variable net.link.ether.bridge
70 *
71 * The arp code is also modified to let a machine answer to requests
72 * irrespective of the port the request came from.
73 *
74 * In case of loops in the bridging topology, the bridge detects this
75 * event and temporarily mutes output bridging on one of the ports.
76 * Periodically, interfaces are unmuted by bdg_timeout().
77 * Muting is only implemented as a safety measure, and also as
78 * a mechanism to support a user-space implementation of the spanning
79 * tree algorithm. In the final release, unmuting will only occur
80 * because of explicit action of the user-level daemon.
81 *
82 * To build a bridging kernel, use the following option
83 * option BRIDGE
84 * and then at runtime set the sysctl variable to enable bridging.
85 *
86 * Only one interface is supposed to have addresses set (but
87 * there are no problems in practice if you set addresses for more
88 * than one interface).
89 * Bridging will act before routing, but nothing prevents a machine
90 * from doing both (modulo bugs in the implementation...).
91 *
92 * THINGS TO REMEMBER
93 * - bridging is incompatible with multicast routing on the same
94 * machine. There is not an easy fix to this.
95 * - loop detection is still not very robust.
96 * - the interface of bdg_forward() could be improved.
97 */
98
99 #include <sys/param.h>
100 #include <sys/mbuf.h>
101 #include <sys/malloc.h>
102 #include <sys/systm.h>
103 #include <sys/socket.h> /* for net/if.h */
104 #include <sys/kernel.h>
105 #include <sys/sysctl.h>
106
107 #include <net/if.h>
108 #include <net/if_types.h>
109
110 #include <netinet/in.h> /* for struct arpcom */
111 #include <netinet/in_systm.h>
112 #include <netinet/in_var.h>
113 #include <netinet/ip.h>
114 #include <netinet/if_ether.h> /* for struct arpcom */
115
116 #include "opt_ipfw.h"
117 #include "opt_ipdn.h"
118
119 #if defined(IPFIREWALL)
120 #include <net/route.h>
121 #include <netinet/ip_fw.h>
122 #if defined(DUMMYNET)
123 #include <netinet/ip_dummynet.h>
124 #endif
125 #endif
126
127 #include <net/bridge.h>
128
129 /*
130 * For debugging, you can use the following macros.
131 * remember, rdtsc() only works on Pentium-class machines
132
133 quad_t ticks;
134 DDB(ticks = rdtsc();)
135 ... interesting code ...
136 DDB(bdg_fw_ticks += (u_int32_t)(rdtsc() - ticks) ; bdg_fw_count++ ;)
137
138 *
139 */
140
141 #define DDB(x) x
142 #define DEB(x)
143
144 static void bdginit(void *);
145 static void bdgtakeifaces(void);
146 static void flush_table(void);
147 static void bdg_promisc_on(void);
148 static void parse_bdg_cfg(void);
149
150 static int bdg_ipfw = 0 ;
151 int do_bridge = 0;
152 bdg_hash_table *bdg_table = NULL ;
153
154 /*
155 * System initialization
156 */
157
158 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, bdginit, NULL)
159
160 static struct bdg_stats bdg_stats ;
161 struct bdg_softc *ifp2sc = NULL ;
162 /* XXX make it static of size BDG_MAX_PORTS */
163
164 #define IFP_CHK(ifp, x) \
165 if (ifp2sc[ifp->if_index].magic != 0xDEADBEEF) { x ; }
166
167 /*
168 * turn off promisc mode, optionally clear the IFF_USED flag.
169 * The flag is turned on by parse_bdg_config
170 */
171 static void
172 bdg_promisc_off(int clear_used)
173 {
174 struct ifnet *ifp ;
175 ifnet_head_lock_shared();
176 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
177 if ( (ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) {
178 int s, ret ;
179 s = splimp();
180 ret = ifnet_set_promiscuous(ifp, 0);
181 splx(s);
182 ifp2sc[ifp->if_index].flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ;
183 DEB(printf(">> now %s%d promisc OFF if_flags 0x%x bdg_flags 0x%x\n",
184 ifp->if_name, ifp->if_unit,
185 ifp->if_flags, ifp2sc[ifp->if_index].flags);)
186 }
187 if (clear_used) {
188 ifp2sc[ifp->if_index].flags &= ~(IFF_USED) ;
189 bdg_stats.s[ifp->if_index].name[0] = '\0';
190 }
191 }
192 ifnet_head_done();
193 }
194
195 /*
196 * set promisc mode on the interfaces we use.
197 */
198 static void
199 bdg_promisc_on()
200 {
201 struct ifnet *ifp ;
202 int s ;
203
204 ifnet_head_lock_shared();
205 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
206 if ( !BDG_USED(ifp) )
207 continue ;
208 if ( 0 == ( ifp->if_flags & IFF_UP) ) {
209 s = splimp();
210 if_up(ifp);
211 splx(s);
212 }
213 if ( !(ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) {
214 int ret ;
215 s = splimp();
216 ret = ifnet_set_promiscuous(ifp, 1);
217 splx(s);
218 ifp2sc[ifp->if_index].flags |= IFF_BDG_PROMISC ;
219 printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n",
220 ifp->if_name, ifp->if_unit,
221 ifp->if_flags, ifp2sc[ifp->if_index].flags);
222 }
223 if (BDG_MUTED(ifp)) {
224 printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit);
225 BDG_UNMUTE(ifp) ;
226 }
227 }
228 ifnet_head_done();
229 }
230
231 static int
232 sysctl_bdg(SYSCTL_HANDLER_ARGS)
233 {
234 int error, oldval = do_bridge ;
235
236 error = sysctl_handle_int(oidp,
237 oidp->oid_arg1, oidp->oid_arg2, req);
238 DEB( printf("called sysctl for bridge name %s arg2 %d val %d->%d\n",
239 oidp->oid_name, oidp->oid_arg2,
240 oldval, do_bridge); )
241
242 if (bdg_table == NULL)
243 do_bridge = 0 ;
244 if (oldval != do_bridge) {
245 bdg_promisc_off( 1 ); /* reset previously used interfaces */
246 flush_table();
247 if (do_bridge) {
248 parse_bdg_cfg();
249 bdg_promisc_on();
250 }
251 }
252 return error ;
253 }
254
255 static char bridge_cfg[256] = { "" } ;
256
257 /*
258 * parse the config string, set IFF_USED, name and cluster_id
259 * for all interfaces found.
260 */
261 static void
262 parse_bdg_cfg()
263 {
264 char *p, *beg ;
265 int i, l, cluster;
266 struct bdg_softc *b;
267
268 for (p= bridge_cfg; *p ; p++) {
269 /* interface names begin with [a-z] and continue up to ':' */
270 if (*p < 'a' || *p > 'z')
271 continue ;
272 for ( beg = p ; *p && *p != ':' ; p++ )
273 ;
274 if (*p == 0) /* end of string, ':' not found */
275 return ;
276 l = p - beg ; /* length of name string */
277 p++ ;
278 DEB(printf("-- match beg(%d) <%s> p <%s>\n", l, beg, p);)
279 for (cluster = 0 ; *p && *p >= '0' && *p <= '9' ; p++)
280 cluster = cluster*10 + (*p -'0');
281 /*
282 * now search in bridge strings
283 */
284 for (i=0, b = ifp2sc ; i < if_index ; i++, b++) {
285 char buf[32];
286 struct ifnet *ifp = b->ifp ;
287
288 if (ifp == NULL)
289 continue;
290 sprintf(buf, "%s%d", ifp->if_name, ifp->if_unit);
291 if (!strncmp(beg, buf, l)) { /* XXX not correct for >10 if! */
292 b->cluster_id = htons(cluster) ;
293 b->flags |= IFF_USED ;
294 sprintf(bdg_stats.s[ifp->if_index].name,
295 "%s%d:%d", ifp->if_name, ifp->if_unit, cluster);
296
297 DEB(printf("--++ found %s\n",
298 bdg_stats.s[ifp->if_index].name);)
299 break ;
300 }
301 }
302 if (*p == '\0')
303 break ;
304 }
305 }
306
307 static int
308 sysctl_bdg_cfg(SYSCTL_HANDLER_ARGS)
309 {
310 int error = 0 ;
311 char oldval[256] ;
312
313 strlcpy(oldval, bridge_cfg, sizeof (oldval));
314
315 error = sysctl_handle_string(oidp,
316 bridge_cfg, oidp->oid_arg2, req);
317 DEB(
318 printf("called sysctl for bridge name %s arg2 %d err %d val %s->%s\n",
319 oidp->oid_name, oidp->oid_arg2,
320 error,
321 oldval, bridge_cfg);
322 )
323 if (strcmp(oldval, bridge_cfg)) {
324 bdg_promisc_off( 1 ); /* reset previously-used interfaces */
325 flush_table();
326 parse_bdg_cfg(); /* and set new ones... */
327 if (do_bridge)
328 bdg_promisc_on(); /* re-enable interfaces */
329 }
330 return error ;
331 }
332
333 static int
334 sysctl_refresh(SYSCTL_HANDLER_ARGS)
335 {
336 if (req->newptr)
337 bdgtakeifaces();
338
339 return 0;
340 }
341
342
343 SYSCTL_DECL(_net_link_ether);
344 SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_cfg, CTLTYPE_STRING|CTLFLAG_RW,
345 &bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A",
346 "Bridge configuration");
347
348 SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW,
349 &do_bridge, 0, &sysctl_bdg, "I", "Bridging");
350
351 SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW,
352 &bdg_ipfw,0,"Pass bridged pkts through firewall");
353
354 #define SY(parent, var, comment) \
355 static int var ; \
356 SYSCTL_INT(parent, OID_AUTO, var, CTLFLAG_RW, &(var), 0, comment);
357
358 int bdg_ipfw_drops;
359 SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_drop,
360 CTLFLAG_RW, &bdg_ipfw_drops,0,"");
361
362 int bdg_ipfw_colls;
363 SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_collisions,
364 CTLFLAG_RW, &bdg_ipfw_colls,0,"");
365
366 SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_refresh, CTLTYPE_INT|CTLFLAG_WR,
367 NULL, 0, &sysctl_refresh, "I", "iface refresh");
368
369 #if 1 /* diagnostic vars */
370
371 SY(_net_link_ether, verbose, "Be verbose");
372 SY(_net_link_ether, bdg_split_pkts, "Packets split in bdg_forward");
373
374 SY(_net_link_ether, bdg_thru, "Packets through bridge");
375
376 SY(_net_link_ether, bdg_copied, "Packets copied in bdg_forward");
377
378 SY(_net_link_ether, bdg_copy, "Force copy in bdg_forward");
379 SY(_net_link_ether, bdg_predict, "Correctly predicted header location");
380
381 SY(_net_link_ether, bdg_fw_avg, "Cycle counter avg");
382 SY(_net_link_ether, bdg_fw_ticks, "Cycle counter item");
383 SY(_net_link_ether, bdg_fw_count, "Cycle counter count");
384 #endif
385
386 SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats,
387 CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics");
388
389 static int bdg_loops ;
390
391 /*
392 * completely flush the bridge table.
393 */
394 static void
395 flush_table()
396 {
397 int s,i;
398
399 if (bdg_table == NULL)
400 return ;
401 s = splimp();
402 for (i=0; i< HASH_SIZE; i++)
403 bdg_table[i].name= NULL; /* clear table */
404 splx(s);
405 }
406
407 /*
408 * called periodically to flush entries etc.
409 */
410 static void
411 bdg_timeout(void *dummy)
412 {
413 static int slowtimer = 0 ;
414
415 if (bdg_inted == 0) {
416 bdg_init2(0);
417 } else if (do_bridge) {
418 static int age_index = 0 ; /* index of table position to age */
419 int l = age_index + HASH_SIZE/4 ;
420 /*
421 * age entries in the forwarding table.
422 */
423 if (l > HASH_SIZE)
424 l = HASH_SIZE ;
425 for (; age_index < l ; age_index++)
426 if (bdg_table[age_index].used)
427 bdg_table[age_index].used = 0 ;
428 else if (bdg_table[age_index].name) {
429 /* printf("xx flushing stale entry %d\n", age_index); */
430 bdg_table[age_index].name = NULL ;
431 }
432 if (age_index >= HASH_SIZE)
433 age_index = 0 ;
434
435 if (--slowtimer <= 0 ) {
436 slowtimer = 5 ;
437
438 bdg_promisc_on() ; /* we just need unmute, really */
439 bdg_loops = 0 ;
440 }
441 }
442 timeout(bdg_timeout, (void *)0, 2*hz );
443 }
444
445 /*
446 * local MAC addresses are held in a small array. This makes comparisons
447 * much faster.
448 */
449 bdg_addr bdg_addresses[BDG_MAX_PORTS];
450 int bdg_ports ;
451
452 /*
453 * initialization of bridge code. This needs to be done after all
454 * interfaces have been configured.
455 */
456
457 static int bdg_inited = 0;
458
459 static void
460 bdg_init2(void)
461 {
462 if (bdg_inited != 0)
463 return;
464
465 if (bdg_table == NULL) {
466 bdg_table = (struct hash_table *)
467 _MALLOC(HASH_SIZE * sizeof(struct hash_table),
468 M_IFADDR, M_WAITOK);
469 if (bdg_table == NULL)
470 return;
471
472 flush_table();
473 }
474
475 if (ifp2sc == NULL) {
476 ifp2sc = _MALLOC(BDG_MAX_PORTS * sizeof(struct bdg_softc),
477 M_IFADDR, M_WAITOK );
478 if (ifp2sc == NULL)
479 return;
480
481 bzero(ifp2sc, BDG_MAX_PORTS * sizeof(struct bdg_softc) );
482 bdgtakeifaces();
483 }
484
485 bdg_inited = 1;
486 }
487
488 static void
489 bdginit(void *dummy)
490 {
491 /* Initialize first what can't fail */
492 bzero(&bdg_stats, sizeof(bdg_stats) );
493 do_bridge=0;
494
495 /* Attempt to initialize the rest and start the timer */
496 bdg_timeout(0);
497 }
498
499 void
500 bdgtakeifaces(void)
501 {
502 int i ;
503 struct ifnet *ifp;
504 bdg_addr *p = bdg_addresses ;
505 struct bdg_softc *bp;
506
507 bdg_ports = 0 ;
508 *bridge_cfg = '\0';
509
510 printf("BRIDGE 010131, have %d interfaces\n", if_index);
511 ifnet_head_lock_shared();
512 for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ;
513 i++, ifp = TAILQ_NEXT(ifp, if_link) )
514 if (ifp->if_type == IFT_ETHER) { /* ethernet ? */
515 ifnet_lladdr_copy_bytes(ifp, p->etheraddr, ETHER_ADDR_LEN);
516 bp = &ifp2sc[ifp->if_index] ;
517 sprintf(bridge_cfg + strlen(bridge_cfg),
518 "%s%d:1,", ifp->if_name, ifp->if_unit);
519 printf("-- index %d %s type %d phy %d addrl %d addr %6D\n",
520 ifp->if_index,
521 bdg_stats.s[ifp->if_index].name,
522 (int)ifp->if_type, (int) ifp->if_physical,
523 (int)ifp->if_addrlen,
524 p->etheraddr, "." );
525 p++ ;
526 bp->ifp = ifp ;
527 bp->flags = IFF_USED ;
528 bp->cluster_id = htons(1) ;
529 bp->magic = 0xDEADBEEF ;
530
531 sprintf(bdg_stats.s[ifp->if_index].name,
532 "%s%d:%d", ifp->if_name, ifp->if_unit,
533 ntohs(bp->cluster_id));
534 bdg_ports ++ ;
535 }
536 ifnet_head_done();
537 }
538
539 /*
540 * bridge_in() is invoked to perform bridging decision on input packets.
541 *
542 * On Input:
543 * eh Ethernet header of the incoming packet.
544 *
545 * On Return: destination of packet, one of
546 * BDG_BCAST broadcast
547 * BDG_MCAST multicast
548 * BDG_LOCAL is only for a local address (do not forward)
549 * BDG_DROP drop the packet
550 * ifp ifp of the destination interface.
551 *
552 * Forwarding is not done directly to give a chance to some drivers
553 * to fetch more of the packet, or simply drop it completely.
554 */
555
556 struct ifnet *
557 bridge_in(struct ifnet *ifp, struct ether_header *eh)
558 {
559 int index;
560 struct ifnet *dst , *old ;
561 int dropit = BDG_MUTED(ifp) ;
562
563 /*
564 * hash the source address
565 */
566 index= HASH_FN(eh->ether_shost);
567 bdg_table[index].used = 1 ;
568 old = bdg_table[index].name ;
569 if ( old ) { /* the entry is valid. */
570 IFP_CHK(old, printf("bridge_in-- reading table\n") );
571
572 if (!BDG_MATCH( eh->ether_shost, bdg_table[index].etheraddr) ) {
573 bdg_ipfw_colls++ ;
574 bdg_table[index].name = NULL ;
575 } else if (old != ifp) {
576 /*
577 * found a loop. Either a machine has moved, or there
578 * is a misconfiguration/reconfiguration of the network.
579 * First, do not forward this packet!
580 * Record the relocation anyways; then, if loops persist,
581 * suspect a reconfiguration and disable forwarding
582 * from the old interface.
583 */
584 bdg_table[index].name = ifp ; /* relocate address */
585 printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n",
586 bdg_loops, eh->ether_shost, ".",
587 ifp->if_name, ifp->if_unit,
588 old->if_name, old->if_unit,
589 BDG_MUTED(old) ? "muted":"active");
590 dropit = 1 ;
591 if ( !BDG_MUTED(old) ) {
592 if (++bdg_loops > 10)
593 BDG_MUTE(old) ;
594 }
595 }
596 }
597
598 /*
599 * now write the source address into the table
600 */
601 if (bdg_table[index].name == NULL) {
602 DEB(printf("new addr %6D at %d for %s%d\n",
603 eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit);)
604 bcopy(eh->ether_shost, bdg_table[index].etheraddr, 6);
605 bdg_table[index].name = ifp ;
606 }
607 dst = bridge_dst_lookup(eh);
608 /* Return values:
609 * BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp.
610 * For muted interfaces, the first 3 are changed in BDG_LOCAL,
611 * and others to BDG_DROP. Also, for incoming packets, ifp is changed
612 * to BDG_DROP in case ifp == src . These mods are not necessary
613 * for outgoing packets from ether_output().
614 */
615 BDG_STAT(ifp, BDG_IN);
616 switch ((int)dst) {
617 case (int)BDG_BCAST:
618 case (int)BDG_MCAST:
619 case (int)BDG_LOCAL:
620 case (int)BDG_UNKNOWN:
621 case (int)BDG_DROP:
622 BDG_STAT(ifp, dst);
623 break ;
624 default :
625 if (dst == ifp || dropit )
626 BDG_STAT(ifp, BDG_DROP);
627 else
628 BDG_STAT(ifp, BDG_FORWARD);
629 break ;
630 }
631
632 if ( dropit ) {
633 if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL)
634 return BDG_LOCAL ;
635 else
636 return BDG_DROP ;
637 } else {
638 return (dst == ifp ? BDG_DROP : dst ) ;
639 }
640 }
641
642 /*
643 * Forward to dst, excluding src port and muted interfaces.
644 * If src == NULL, the pkt comes from ether_output, and dst is the real
645 * interface the packet is originally sent to. In this case we must forward
646 * it to the whole cluster. We never call bdg_forward ether_output on
647 * interfaces which are not part of a cluster.
648 *
649 * The packet is freed if possible (i.e. surely not of interest for
650 * the upper layer), otherwise a copy is left for use by the caller
651 * (pointer in m0).
652 *
653 * It would be more efficient to make bdg_forward() always consume
654 * the packet, leaving to the caller the task to check if it needs a copy
655 * and get one in case. As it is now, bdg_forward() can sometimes make
656 * a copy whereas it is not necessary.
657 *
658 * XXX be careful about eh, it can be a pointer into *m
659 */
660 struct mbuf *
661 bdg_forward(struct mbuf *m0, struct ether_header *const eh, struct ifnet *dst)
662 {
663 struct ifnet *src = m0->m_pkthdr.rcvif; /* could be NULL in output */
664 struct ifnet *ifp, *last = NULL ;
665 int s ;
666 int shared = bdg_copy ; /* someone else is using the mbuf */
667 int once = 0; /* loop only once */
668 struct ifnet *real_dst = dst ; /* real dst from ether_output */
669 #ifdef IPFIREWALL
670 struct ip_fw_chain *rule = NULL ; /* did we match a firewall rule ? */
671 #endif
672
673 /*
674 * XXX eh is usually a pointer within the mbuf (some ethernet drivers
675 * do that), so we better copy it before doing anything with the mbuf,
676 * or we might corrupt the header.
677 */
678 struct ether_header save_eh = *eh ;
679
680 #if defined(IPFIREWALL) && defined(DUMMYNET)
681 if (m0->m_type == MT_DUMMYNET) {
682 /* extract info from dummynet header */
683 rule = (struct ip_fw_chain *)(m0->m_data) ;
684 m0 = m0->m_next ;
685 src = m0->m_pkthdr.rcvif;
686 shared = 0 ; /* For sure this is our own mbuf. */
687 } else
688 #endif
689 bdg_thru++; /* only count once */
690
691 if (src == NULL) /* packet from ether_output */
692 dst = bridge_dst_lookup(eh);
693 if (dst == BDG_DROP) { /* this should not happen */
694 printf("xx bdg_forward for BDG_DROP\n");
695 m_freem(m0);
696 return NULL;
697 }
698 if (dst == BDG_LOCAL) { /* this should not happen as well */
699 printf("xx ouch, bdg_forward for local pkt\n");
700 return m0;
701 }
702 if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) {
703 ifp = ifnet_head.tqh_first ; /* scan all ports */
704 once = 0 ;
705 if (dst != BDG_UNKNOWN) /* need a copy for the local stack */
706 shared = 1 ;
707 } else {
708 ifp = dst ;
709 once = 1 ;
710 }
711 if ( (u_int)(ifp) <= (u_int)BDG_FORWARD )
712 panic("bdg_forward: bad dst");
713
714 #ifdef IPFIREWALL
715 /*
716 * Do filtering in a very similar way to what is done in ip_output.
717 * Only if firewall is loaded, enabled, and the packet is not
718 * from ether_output() (src==NULL, or we would filter it twice).
719 * Additional restrictions may apply e.g. non-IP, short packets,
720 * and pkts already gone through a pipe.
721 */
722 if (ip_fw_chk_ptr && bdg_ipfw != 0 && src != NULL) {
723 struct ip *ip ;
724 int i;
725
726 if (rule != NULL) /* dummynet packet, already partially processed */
727 goto forward; /* HACK! I should obey the fw_one_pass */
728 if (ntohs(save_eh.ether_type) != ETHERTYPE_IP)
729 goto forward ; /* not an IP packet, ipfw is not appropriate */
730 if (m0->m_pkthdr.len < sizeof(struct ip) )
731 goto forward ; /* header too short for an IP pkt, cannot filter */
732 /*
733 * i need some amt of data to be contiguous, and in case others need
734 * the packet (shared==1) also better be in the first mbuf.
735 */
736 i = min(m0->m_pkthdr.len, max_protohdr) ;
737 if ( shared || m0->m_len < i) {
738 m0 = m_pullup(m0, i) ;
739 if (m0 == NULL) {
740 printf("-- bdg: pullup failed.\n") ;
741 return NULL ;
742 }
743 }
744
745 /*
746 * before calling the firewall, swap fields the same as IP does.
747 * here we assume the pkt is an IP one and the header is contiguous
748 */
749 ip = mtod(m0, struct ip *);
750 NTOHS(ip->ip_len);
751 NTOHS(ip->ip_off);
752
753 /*
754 * The third parameter to the firewall code is the dst. interface.
755 * Since we apply checks only on input pkts we use NULL.
756 * The firewall knows this is a bridged packet as the cookie ptr
757 * is NULL.
758 */
759 i = (*ip_fw_chk_ptr)(&ip, 0, NULL, NULL /* cookie */, &m0, &rule, NULL);
760 if ( (i & IP_FW_PORT_DENY_FLAG) || m0 == NULL) /* drop */
761 return m0 ;
762 /*
763 * If we get here, the firewall has passed the pkt, but the mbuf
764 * pointer might have changed. Restore ip and the fields NTOHS()'d.
765 */
766 ip = mtod(m0, struct ip *);
767 HTONS(ip->ip_len);
768 HTONS(ip->ip_off);
769
770 if (i == 0) /* a PASS rule. */
771 goto forward ;
772 #ifdef DUMMYNET
773 if (i & IP_FW_PORT_DYNT_FLAG) {
774 /*
775 * Pass the pkt to dummynet, which consumes it.
776 * If shared, make a copy and keep the original.
777 * Need to prepend the ethernet header, optimize the common
778 * case of eh pointing already into the original mbuf.
779 */
780 struct mbuf *m ;
781 if (shared) {
782 m = m_copypacket(m0, M_DONTWAIT);
783 if (m == NULL) {
784 printf("bdg_fwd: copy(1) failed\n");
785 return m0;
786 }
787 } else {
788 m = m0 ; /* pass the original to dummynet */
789 m0 = NULL ; /* and nothing back to the caller */
790 }
791 if ( (void *)(eh + 1) == (void *)m->m_data) {
792 m->m_data -= ETHER_HDR_LEN ;
793 m->m_len += ETHER_HDR_LEN ;
794 m->m_pkthdr.len += ETHER_HDR_LEN ;
795 bdg_predict++;
796 } else {
797 M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
798 if (!m && verbose) printf("M_PREPEND failed\n");
799 if (m == NULL) /* nope... */
800 return m0 ;
801 bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN);
802 }
803 dummynet_io((i & 0xffff),DN_TO_BDG_FWD,m,real_dst,NULL,0,rule,0);
804 return m0 ;
805 }
806 #endif
807 /*
808 * XXX add divert/forward actions...
809 */
810 /* if none of the above matches, we have to drop the pkt */
811 bdg_ipfw_drops++ ;
812 printf("bdg_forward: No rules match, so dropping packet!\n");
813 return m0 ;
814 }
815 forward:
816 #endif /* IPFIREWALL */
817 /*
818 * Again, bring up the headers in case of shared bufs to avoid
819 * corruptions in the future.
820 */
821 if ( shared ) {
822 int i = min(m0->m_pkthdr.len, max_protohdr) ;
823
824 m0 = m_pullup(m0, i) ;
825 if (m0 == NULL) {
826 printf("-- bdg: pullup2 failed.\n") ;
827 return NULL ;
828 }
829 }
830 /* now real_dst is used to determine the cluster where to forward */
831 if (src != NULL) /* pkt comes from ether_input */
832 real_dst = src ;
833 for (;;) {
834 if (last) { /* need to forward packet leftover from previous loop */
835 struct mbuf *m ;
836 if (shared == 0 && once ) { /* no need to copy */
837 m = m0 ;
838 m0 = NULL ; /* original is gone */
839 } else {
840 m = m_copypacket(m0, M_DONTWAIT);
841 if (m == NULL) {
842 printf("bdg_forward: sorry, m_copypacket failed!\n");
843 return m0 ; /* the original is still there... */
844 }
845 }
846 /*
847 * Add header (optimized for the common case of eh pointing
848 * already into the mbuf) and execute last part of ether_output:
849 * queue pkt and start output if interface not yet active.
850 */
851 if ( (void *)(eh + 1) == (void *)m->m_data) {
852 m->m_data -= ETHER_HDR_LEN ;
853 m->m_len += ETHER_HDR_LEN ;
854 m->m_pkthdr.len += ETHER_HDR_LEN ;
855 bdg_predict++;
856 } else {
857 M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
858 if (!m && verbose) printf("M_PREPEND failed\n");
859 if (m == NULL)
860 return m0;
861 bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN);
862 }
863 s = splimp();
864 if (IF_QFULL(&last->if_snd)) {
865 IF_DROP(&last->if_snd);
866 #if 0
867 BDG_MUTE(last); /* should I also mute ? */
868 #endif
869 splx(s);
870 m_freem(m); /* consume the pkt anyways */
871 } else {
872 last->if_obytes += m->m_pkthdr.len ;
873 if (m->m_flags & M_MCAST)
874 last->if_omcasts++;
875 if (m->m_pkthdr.len != m->m_len) /* this pkt is on >1 bufs */
876 bdg_split_pkts++;
877
878 IF_ENQUEUE(&last->if_snd, m);
879 if ((last->if_flags & IFF_OACTIVE) == 0)
880 (*last->if_start)(last);
881 splx(s);
882 }
883 BDG_STAT(last, BDG_OUT);
884 last = NULL ;
885 if (once)
886 break ;
887 }
888 if (ifp == NULL)
889 break ;
890 /*
891 * If the interface is used for bridging, not muted, not full,
892 * up and running, is not the source interface, and belongs to
893 * the same cluster as the 'real_dst', then send here.
894 */
895 if ( BDG_USED(ifp) && !BDG_MUTED(ifp) && !IF_QFULL(&ifp->if_snd) &&
896 (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) &&
897 ifp != src && BDG_SAMECLUSTER(ifp, real_dst) )
898 last = ifp ;
899 ifp = TAILQ_NEXT(ifp, if_link) ;
900 if (ifp == NULL)
901 once = 1 ;
902 }
903 DEB(bdg_fw_ticks += (u_int32_t)(rdtsc() - ticks) ; bdg_fw_count++ ;
904 if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; )
905 return m0 ;
906 }